Android Framework原理 -- service_manager服务获取流程分析

语言: CN / TW / HK

在上一节中,我们对于Binder底层驱动源码做了具体分析,当service_manager服务启动之后,打开了binder驱动,并做了内存映射关系,那么接下来我们继续关注service_manager源码,看如何注册服务并传递数据的。

1 service_manager注册流程分析

我们再次回到service_manager的入口函数中,前面我们分析了binder_open主要做了什么 ,我们继续往下看 ```C int main(int argc, char argv) { struct binder_state bs; union selinux_callback cb; char driver;

if (argc > 1) {
    driver = argv[1];
} else {
    driver = "/dev/binder";
}

bs = binder_open(driver, 128*1024);
//......

if (binder_become_context_manager(bs)) {
    ALOGE("cannot become context manager (%s)\n", strerror(errno));
    return -1;
}

cb.func_audit = audit_callback;
selinux_set_callback(SELINUX_CB_AUDIT, cb);
cb.func_log = selinux_log_callback;
selinux_set_callback(SELINUX_CB_LOG, cb);

ifdef VENDORSERVICEMANAGER

sehandle = selinux_android_vendor_service_context_handle();

else

sehandle = selinux_android_service_context_handle();

endif

selinux_status_open(true);

if (sehandle == NULL) {
    ALOGE("SELinux: Failed to acquire sehandle. Aborting.\n");
    abort();
}

if (getcon(&service_manager_context) != 0) {
    ALOGE("SELinux: Failed to acquire service_manager context. Aborting.\n");
    abort();
}


binder_loop(bs, svcmgr_handler);

return 0;

} ```

打开Binder驱动之后,返回一个句柄bs,然后调用了binder_become_context_manager,将service_manager设置为了大管家,我们看下binder_become_context_manager具体做了什么?

1.1 binder_become_context_manager源码分析

C int binder_become_context_manager(struct binder_state *bs) { //调用binder驱动方法 return ioctl(bs->fd, BINDER_SET_CONTEXT_MGR, 0); }

在binder_become_context_manager方法中,我们看到只是调用了ioctl方法,看到这个我们就知道,肯定是调用了binder驱动的binder_ioctl方法。

```C static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { //.....

switch (cmd) { case BINDER_WRITE_READ: ret = binder_ioctl_write_read(filp, cmd, arg, thread); if (ret) goto err; break; case BINDER_SET_MAX_THREADS: if (copy_from_user(&proc->max_threads, ubuf, sizeof(proc->max_threads))) { ret = -EINVAL; goto err; } break; case BINDER_SET_CONTEXT_MGR: ret = binder_ioctl_set_ctx_mgr(filp); if (ret) goto err; ret = security_binder_set_context_mgr(proc->tsk); if (ret < 0) goto err; break; case BINDER_THREAD_EXIT: binder_debug(BINDER_DEBUG_THREADS, "%d:%d exit\n", proc->pid, thread->pid); binder_free_thread(proc, thread); thread = NULL; break; case BINDER_VERSION: { struct binder_version __user *ver = ubuf;

  if (size != sizeof(struct binder_version)) {
     ret = -EINVAL;
     goto err;
  }
  if (put_user(BINDER_CURRENT_PROTOCOL_VERSION,
          &ver->protocol_version)) {
     ret = -EINVAL;
     goto err;
  }
  break;

} default: ret = -EINVAL; goto err; } ret = 0; } **在service_manager中传入的cmd命令是BINDER_SET_CONTEXT_MGR**,那么我们可以直接从上面的代码中找到,是调用了binder_ioctl_set_ctx_mgr方法:C static int binder_ioctl_set_ctx_mgr(struct file filp) { int ret = 0; struct binder_proc proc = filp->private_data; //...... binder_context_mgr_node = binder_new_node(proc, 0, 0); if (binder_context_mgr_node == NULL) { ret = -ENOMEM; goto out; } binder_context_mgr_node->local_weak_refs++; binder_context_mgr_node->local_strong_refs++; binder_context_mgr_node->has_strong_ref = 1; binder_context_mgr_node->has_weak_ref = 1; out: return ret; } 在这个方法中,首先获取到了当前进程的binder_proc对象,调用binder_new_node方法,创建了一个binder_node对象,并赋值给了**binder_context_mgr_node,因为在Binder中会处处使用这个对象,因此为了快速查找到service_manager,而创建的一个全局对象**。C static struct binder_node binder_new_node(struct binder_proc proc, binder_uintptr_t ptr, binder_uintptr_t cookie) { struct rb_node p = &proc->nodes.rb_node; struct rb_node parent = NULL; struct binder_node node;

while (p) { parent = p; node = rb_entry(parent, struct binder_node, rb_node);

  if (ptr < node->ptr)
     p = &(*p)->rb_left;
  else if (ptr > node->ptr)
     p = &(*p)->rb_right;
  else
     return NULL;

}

node = kzalloc(sizeof(node), GFP_KERNEL); if (node == NULL) return NULL; binder_stats_created(BINDER_STAT_NODE); rb_link_node(&node->rb_node, parent, p); rb_insert_color(&node->rb_node, &proc->nodes); node->debug_id = ++binder_last_id; node->proc = proc; node->ptr = ptr; node->cookie = cookie; node->work.type = BINDER_WORK_NODE; INIT_LIST_HEAD(&node->work.entry); INIT_LIST_HEAD(&node->async_todo); binder_debug(BINDER_DEBUG_INTERNAL_REFS, "%d:%d node %d u%016llx c%016llx created\n", proc->pid, current->pid, node->debug_id, (u64)node->ptr, (u64)node->cookie); return node; } ``` 所以binder_become_context_manager做的主要工作就是:\ (1)创建binder_node,目的为了快速查找到service_manager,达到大管家的目的;\ (2)创建了work和async_todo队列,类似于MessageQueue*

1.2 binder_loop源码分析

在将service_manager设置为大管家之后,调用了binder_loop方法,这个有点类似于Looper中的loop方法,看下源码: ```C void binder_loop(struct binder_state *bs, binder_handler func) { int res; struct binder_write_read bwr; uint32_t readbuf[32]; //初始化 bwr.write_size = 0; bwr.write_consumed = 0; bwr.write_buffer = 0; //开启looper循环 readbuf[0] = BC_ENTER_LOOPER; //① binder_write(bs, readbuf, sizeof(uint32_t));

for (;;) {
    bwr.read_size = sizeof(readbuf);
    bwr.read_consumed = 0;
    bwr.read_buffer = (uintptr_t) readbuf;
    //②
    res = ioctl(bs->fd, BINDER_WRITE_READ, &bwr);

    if (res < 0) {
        ALOGE("binder_loop: ioctl failed (%s)\n", strerror(errno));
        break;
    }

    res = binder_parse(bs, 0, (uintptr_t) readbuf, bwr.read_consumed, func);
    if (res == 0) {
        ALOGE("binder_loop: unexpected reply?!\n");
        break;
    }
    if (res < 0) {
        ALOGE("binder_loop: io error %d %s\n", res, strerror(errno));
        break;
    }
}

} 在binder_loop中,声明了一个读写结构体binder_write_read,并将其初始化;\ **①:然后调用了binder_write方法,执行了BC_ENTER_LOOPER命令**C int binder_write(struct binder_state bs, void data, size_t len) { struct binder_write_read bwr; int res;

bwr.write_size = len;
bwr.write_consumed = 0;
bwr.write_buffer = (uintptr_t) data;
bwr.read_size = 0;
bwr.read_consumed = 0;
bwr.read_buffer = 0;
//这里执行了读写操作
res = ioctl(bs->fd, BINDER_WRITE_READ, &bwr);
if (res < 0) {
    fprintf(stderr,"binder_write: ioctl failed (%s)\n",
            strerror(errno));
}
return res;

} ``` 我们看到还是执行了底层驱动的binder_ioctl方法,命令为BINDER_WRITE_READ,所以对应执行了binder_ioctl_write_read方法

```C static int binder_ioctl_write_read(struct file filp, unsigned int cmd, unsigned long arg, struct binder_thread thread) { int ret = 0; struct binder_proc proc = filp->private_data; unsigned int size = _IOC_SIZE(cmd); void __user ubuf = (void __user *)arg; struct binder_write_read bwr;

if (size != sizeof(struct binder_write_read)) { ret = -EINVAL; goto out; } if (copy_from_user(&bwr, ubuf, sizeof(bwr))) { ret = -EFAULT; goto out; } binder_debug(BINDER_DEBUG_READ_WRITE, "%d:%d write %lld at %016llx, read %lld at %016llx\n", proc->pid, thread->pid, (u64)bwr.write_size, (u64)bwr.write_buffer, (u64)bwr.read_size, (u64)bwr.read_buffer);

if (bwr.write_size > 0) { ret = binder_thread_write(proc, thread, bwr.write_buffer, bwr.write_size, &bwr.write_consumed); trace_binder_write_done(ret); if (ret < 0) { bwr.read_consumed = 0; if (copy_to_user(ubuf, &bwr, sizeof(bwr))) ret = -EFAULT; goto out; } } if (bwr.read_size > 0) { ret = binder_thread_read(proc, thread, bwr.read_buffer, bwr.read_size, &bwr.read_consumed, filp->f_flags & O_NONBLOCK); trace_binder_read_done(ret); if (!list_empty(&proc->todo)) wake_up_interruptible(&proc->wait); if (ret < 0) { if (copy_to_user(ubuf, &bwr, sizeof(bwr))) ret = -EFAULT; goto out; } } binder_debug(BINDER_DEBUG_READ_WRITE, "%d:%d wrote %lld of %lld, read return %lld of %lld\n", proc->pid, thread->pid, (u64)bwr.write_consumed, (u64)bwr.write_size, (u64)bwr.read_consumed, (u64)bwr.read_size); if (copy_to_user(ubuf, &bwr, sizeof(bwr))) { ret = -EFAULT; goto out; } out: return ret; } 我们看到Binder驱动读写操作是根据读写结构体的write_size或者read_size是否大于0来进行读写,因为在binder_write中,我们看到write_size > 0,因此执行写操作,对应执行binder_thread_write方法,这个方法中同样是根据命令来执行,我们只需要看BC_ENTER_LOOPER对应的代码块。C case BC_ENTER_LOOPER: binder_debug(BINDER_DEBUG_THREADS, "%d:%d BC_ENTER_LOOPER\n", proc->pid, thread->pid); if (thread->looper & BINDER_LOOPER_STATE_REGISTERED) { thread->looper |= BINDER_LOOPER_STATE_INVALID; binder_user_error("%d:%d ERROR: BC_ENTER_LOOPER called after BC_REGISTER_LOOPER\n", proc->pid, thread->pid); } thread->looper |= BINDER_LOOPER_STATE_ENTERED; break; ``` 这个时候,就是设置了一个状态BINDER_LOOPER_STATE_ENTERED,告诉驱动已经进入了looper循环的状态了。

②:然后进入死循环,又调用了一次ioctl方法,这个时候的\ read_size > 0,read_consumed = 0\ 可以进行读操作,看下binder_thread_read方法

```C static int binder_thread_read(struct binder_proc proc, struct binder_thread thread, binder_uintptr_t binder_buffer, size_t size, binder_size_t consumed, int non_block) { void __user buffer = (void __user )(uintptr_t)binder_buffer; void __user ptr = buffer + consumed; void __user end = buffer + size;

int ret = 0; int wait_for_proc_work;

if (consumed == 0) { if (put_user(BR_NOOP, (uint32_t __user )ptr)) return -EFAULT; ptr += sizeof(uint32_t); } //这里wait_for_proc_work = true wait_for_proc_work = thread->transaction_stack == NULL && list_empty(&thread->todo); //...... if (wait_for_proc_work) { //...... if (non_block) { if (!binder_has_proc_work(proc, thread)) ret = -EAGAIN; } else ret = wait_event_freezable_exclusive(proc->wait, binder_has_proc_work(proc, thread)); } else { if (non_block) { if (!binder_has_thread_work(thread)) ret = -EAGAIN; } else ret = wait_event_freezable(thread->wait, binder_has_thread_work(thread)); } ``` (1)因为进入死循环之后,设置了read_consumed = 0,因此在binder_thread_read中,会设置一个BR_NOOP标志;\ (2)接着wait_for_proc_work这个变量,因为一开始队列里都没有任务,所以为true;\ (3)然后有一系列判断完成,调用wait_event_freezable_exclusive进入了等待的状态,如果后续有消息发送进来就会处理。

所以binder_loop主要做了2件事:\ (1)设置Looper的状态为BINDER_LOOPER_STATE_ENTERED,代表着已经进入了Looper循环中;\ (2)进入死循环之后,去读数据,进入等待状态;

以上就代表着service_manager已经注册完成了。

image.png

2 获取service_manager流程分析

一般情况下,在native层注册服务到sm需要获取到sm,那么我们需要看一下,获取sm服务的流程。

2.1 native层

在native层获取sm服务,我们看下源码:

```C++ //http://androidxref.com/9.0.0_r3/xref/frameworks/native/libs/binder/IServiceManager.cpp

namespace android {

sp defaultServiceManager() { if (gDefaultServiceManager != NULL) return gDefaultServiceManager;

{
    AutoMutex _l(gDefaultServiceManagerLock);
    while (gDefaultServiceManager == NULL) {
        gDefaultServiceManager = interface_cast<IServiceManager>(
            ProcessState::self()->getContextObject(NULL));
        if (gDefaultServiceManager == NULL)
            sleep(1);
    }
}

return gDefaultServiceManager;

}

`` 应用层向native层注册服务的时候,为了获取sm服务,就是调用defaultServiceManager这个方法,该方法是一个单例,如果gDefaultServiceManager是空的,那么会调用interface_cast( ProcessState::self()->getContextObject(NULL))`去获取。

2.1.1 ProcessState::self()

首先,我们先看下ProcessState这个参数具体做了什么

ProcessState::self()->getContextObject(NULL) 首先,我们看ProcessState的self方法,其实就是创建了一个ProcessState对象 C++ sp<ProcessState> ProcessState::self() { Mutex::Autolock _l(gProcessMutex); if (gProcess != NULL) { return gProcess; } gProcess = new ProcessState("/dev/binder"); return gProcess; } 在ProcessState的构造方法中,传入了/dev/binder路径,看到这个,我们肯定知道是要打开Binder驱动,然后设置一个最大线程数DEFAULT_MAX_BINDER_THREADS = 15; ```C++ ProcessState::ProcessState(const char driver) : mDriverName(String8(driver)) , mDriverFD(open_driver(driver)) , mVMStart(MAP_FAILED) , mThreadCountLock(PTHREAD_MUTEX_INITIALIZER) , mThreadCountDecrement(PTHREAD_COND_INITIALIZER) , mExecutingThreadsCount(0) , mMaxThreads(DEFAULT_MAX_BINDER_THREADS) , mStarvationStartTimeMs(0) , mManagesContexts(false) , mBinderContextCheckFunc(NULL) , mBinderContextUserData(NULL) , mThreadPoolStarted(false) , mThreadPoolSeq(1) { if (mDriverFD >= 0) { // mmap the binder, providing a chunk of virtual address space to receive transactions. mVMStart = mmap(0, BINDER_VM_SIZE, PROT_READ, MAP_PRIVATE | MAP_NORESERVE, mDriverFD, 0); if (mVMStart == MAP_FAILED) { // sigh* ALOGE("Using %s failed: unable to mmap transaction memory.\n", mDriverName.c_str()); close(mDriverFD); mDriverFD = -1; mDriverName.clear(); } }

LOG_ALWAYS_FATAL_IF(mDriverFD < 0, "Binder driver could not be opened.  Terminating.");

} ``` 因为defaultServiceManager是单例,因此每个进程只拥有一个ProcessState,打开Binder驱动之后,调用mmap向驱动层申请内存映射;

image.png

在此之前,我们看到sm服务开启之后,内核空间开辟的内存为128K,而普通服务来注册的时候,内核空间开辟的内存为BINDER_VM_SIZE 也就是 1M-8k ```C++

define BINDER_VM_SIZE ((1 * 1024 * 1024) - sysconf(_SC_PAGE_SIZE) * 2)

```

总结:ProcessState::self()做了什么事?\ (1)打开Binder驱动;\ (2)设置最大Binder线程数15\ (3)mmap内存映射,1M-8k

ProcessState::self()是每个服务来注册时候都需要创建一次,且仅有一次

2.2.2 getContextObject

```C++ sp ProcessState::getContextObject(const sp& /caller/) { return getStrongProxyForHandle(0); } //具体调用 sp ProcessState::getStrongProxyForHandle(int32_t handle) { sp result; AutoMutex _l(mLock); handle_entry* e = lookupHandleLocked(handle);

if (e != NULL) {

    IBinder* b = e->binder;
    if (b == NULL || !e->refs->attemptIncWeak(this)) {
        if (handle == 0) {
            Parcel data;
            status_t status = IPCThreadState::self()->transact(
                    0, IBinder::PING_TRANSACTION, data, NULL, 0);
            if (status == DEAD_OBJECT)
               return NULL;
        }
        //核心点
        b = BpBinder::create(handle);
        e->binder = b;
        if (b) e->refs = b->getWeakRefs();
        result = b;
    } else {
        result.force_set(b);
        e->refs->decWeak(this);
    }
}

return result;

} ``` 在创建ProcessState之后,调用了getContextObject方法,其实是调用了getStrongProxyForHandle,我们看下这个方法,核心点在于创建了BpBinder对象,并返回;

BpBinder我们可以将它看做是客户端的对象,尤其是方法名中带有一个Proxy,可以往这儿联想就是服务端的一个代理对象,也就是说

C++ ProcessState::self()->getContextObject(NULL) 最终返回的就是一个BpBinder对象。

2.2.3 interface_cast

C++ interface_cast<IServiceManager>(BpBinder); 接下来我们需要处理的就是interface_cast做了什么类型转换

C++ template<typename INTERFACE> inline sp<INTERFACE> interface_cast(const sp<IBinder>& obj) { return INTERFACE::asInterface(obj); } 我们看到interface_cast其实是一个模板函数,然后通过定义我们知道,其实下面两行代码是等价的 C++ interface_cast<IServiceManager>(BpBinder); == IServiceManager::asInterface(BpBinder); 再做一次具体的转换,最终得到的是BpServiceManager对象 #define IMPLEMENT_META_INTERFACE(INTERFACE, NAME) \ const ::android::String16 I##INTERFACE::descriptor(NAME); \ const ::android::String16& \ I##INTERFACE::getInterfaceDescriptor() const { \ return I##INTERFACE::descriptor; \ } \ ::android::sp<I##INTERFACE> I##INTERFACE::asInterface( \ const ::android::sp<::android::IBinder>& obj) \ { \ ::android::sp<I##INTERFACE> intr; \ if (obj != NULL) { \ intr = static_cast<I##INTERFACE*>( \ obj->queryLocalInterface( \ I##INTERFACE::descriptor).get()); \ if (intr == NULL) { \ intr = new Bp##INTERFACE(obj); \ } \ } \ return intr; \ } \ I##INTERFACE::I##INTERFACE() { } \ I##INTERFACE::~I##INTERFACE() { } 也就是说defaultServiceManager方法,最终拿到的就是BpServiceManager(BpBinder)

2.2.4 BpServiceManager

看下BpServiceManager的构造函数,是实现了BpInterface接口 C++ explicit BpServiceManager(const sp<IBinder>& impl) : BpInterface<IServiceManager>(impl) { } 我们看BpInterface接口是将BpBinder作为BpRefBase的mRemote变量赋值,如果我们写过aidl通信,想必对于mRemote是比较熟悉了,它其实是一个BinderProxy(应用层),那么在native层,它就是BpBinder。 C++ inline BpInterface<INTERFACE>::BpInterface(const sp<IBinder>& remote) : BpRefBase(remote) { }

``` class BpRefBase : public virtual RefBase { protected: explicit BpRefBase(const sp& o); virtual ~BpRefBase(); virtual void onFirstRef(); virtual void onLastStrongRef(const void id); virtual bool onIncStrongAttempted(uint32_t flags, const void id);

inline  IBinder*        remote()                { return mRemote; }
inline  IBinder*        remote() const          { return mRemote; }

private: BpRefBase(const BpRefBase& o); BpRefBase& operator=(const BpRefBase& o);

IBinder* const          mRemote;
RefBase::weakref_type*  mRefs;
std::atomic<int32_t>    mState;

}; ```

image.png

现在梳理一下在native层注册服务时,如何获取sm

(1)首先调用defaultServiceManager方法,在这个方法中,干的最主要的事就是创建了BpServiceManager,其内部封装了BpBinder;

(2)BpBinder是BpRefBase的一个成员变量,子类可以通过调用remote方法来获取mRemote对象,也就是能够获取到BpBinder对象,其中BpServiceManager继承自BpRefBase

3 从AIDL理解Stub和Proxy机制

在Android系统重,进程间通信的方式有很多,如果想要实现实时精准的进程间通信,往往使用aidl java interface IMyTransport { void send(String valiue); } 在创建一个aidl接口之后,需要重新build一次工程,就能生成对应的类;使用aidl必然使用到服务,如果在进程内使用,那么onBind直接返回null即可; kotlin class MyService : Service() { override fun onBind(intent: Intent?): IBinder? { return null } } 如果需要跨进程通信,那么就需要返回一个Binder对象,这个对象是具备跨进程通信能力的

image.png ```kotlin class MyService : Service() { override fun onBind(intent: Intent?): IBinder? { return MyBinder() }

inner class MyBinder : IMyTransport.Stub(){
    override fun send(valiue: String?) {

    }
}

} ``` 那么既然要实现跨进程通信,那么客户端也需要持有这个aidl文件,而且包名必须要一致,因此需要拷贝一份到客户端。

``` findViewById