Re: [PATCH 1/3] soc: qcom: Introduce Protection Domain Restart helpers

From: sibis
Date: Tue Nov 19 2019 - 05:18:42 EST


Hey Bjorn,
Thanks for taking the time to
review the series :)

On 2019-11-19 12:10, Bjorn Andersson wrote:
On Mon 18 Nov 06:27 PST 2019, Sibi Sankar wrote:
diff --git a/drivers/soc/qcom/pdr_interface.c b/drivers/soc/qcom/pdr_interface.c
[..]
+static void pdr_indack_work(struct work_struct *work)
+{
+ struct pdr_handle *pdr = container_of(work, struct pdr_handle,
+ indack_work);
+ struct pdr_list_node *ind, *tmp;
+ struct pdr_service *pds;
+
+ list_for_each_entry_safe(ind, tmp, &pdr->indack_list, node) {
+ pds = ind->pds;
+ pdr_send_indack_msg(pdr, pds, ind->transaction_id);

So when we et a ind_cb with the new status, we need to send an ack
request, which will result in a response, just to confirm that we got
the event?

Seems like we should fix the qmi code to make it possible to send a
request from the indication handler and then we could simply ignore the

yeah maybe having a provision
to send custom requests back on
indication would be the way
to go. Not all indication need
to be services with requests.

response. Or do we need to not pdr->status() until we get the response
for some reason?

adsp waits on the ack response for
a fixed duration and seems to throw
a fatal err is the ack is not
serviced. Hence holding back pd->status
till we service the ack here.



Regardless, I'm fine with scheduling this for now...

+ pdr->status(pdr, pds);
+ list_del(&ind->node);
+ kfree(ind);
+ }
+}
+
+static void pdr_servreg_ind_cb(struct qmi_handle *qmi,
+ struct sockaddr_qrtr *sq,
+ struct qmi_txn *txn, const void *data)
+{
+ struct pdr_handle *pdr = container_of(qmi, struct pdr_handle,
+ servreg_client);
+ const struct servreg_state_updated_ind *ind_msg = data;
+ struct pdr_list_node *ind;
+ struct pdr_service *pds;
+
+ if (!ind_msg || !ind_msg->service_path ||
+ strlen(ind_msg->service_path) > (SERVREG_NAME_LENGTH + 1))
+ return;
+
+ list_for_each_entry(pds, &pdr->lookups, node) {
+ if (!strcmp(pds->service_path, ind_msg->service_path))
+ goto found;
+ }
+ return;
+
+found:
+ pds->state = ind_msg->curr_state;
+
+ ind = kzalloc(sizeof(*ind), GFP_KERNEL);
+ if (!ind)
+ return;
+
+ pr_info("PDR: Indication received from %s, state: 0x%x, trans-id: %d\n",
+ ind_msg->service_path, ind_msg->curr_state,
+ ind_msg->transaction_id);
+
+ ind->transaction_id = ind_msg->transaction_id;
+ ind->pds = pds;
+
+ mutex_lock(&pdr->list_lock);
+ list_add_tail(&ind->node, &pdr->indack_list);
+ mutex_unlock(&pdr->list_lock);
+
+ queue_work(pdr->indack_wq, &pdr->indack_work);
+}
+
+static struct qmi_msg_handler qmi_indication_handler[] = {
+ {
+ .type = QMI_INDICATION,
+ .msg_id = SERVREG_STATE_UPDATED_IND_ID,
+ .ei = servreg_state_updated_ind_ei,
+ .decoded_size = sizeof(struct servreg_state_updated_ind),
+ .fn = pdr_servreg_ind_cb,
+ },
+ {}
+};
+
+static int pdr_get_domain_list(struct servreg_get_domain_list_req *req,
+ struct servreg_get_domain_list_resp *resp,
+ struct pdr_handle *pdr)
+{
+ struct qmi_txn txn;
+ int ret;
+
+ ret = qmi_txn_init(&pdr->servloc_client, &txn,
+ servreg_get_domain_list_resp_ei, resp);
+ if (ret < 0)
+ return ret;
+
+ ret = qmi_send_request(&pdr->servloc_client,
+ &pdr->servloc_addr,
+ &txn, SERVREG_GET_DOMAIN_LIST_REQ,
+ SERVREG_GET_DOMAIN_LIST_REQ_MAX_LEN,
+ servreg_get_domain_list_req_ei,
+ req);
+ if (ret < 0) {
+ qmi_txn_cancel(&txn);
+ return ret;
+ }
+
+ ret = qmi_txn_wait(&txn, 5 * HZ);
+ if (ret < 0) {
+ pr_err("PDR: %s get domain list txn wait failed: %d\n",
+ req->service_name, ret);
+ return ret;
+ }
+
+ /* Check the response */
+ if (resp->resp.result != QMI_RESULT_SUCCESS_V01) {
+ pr_err("PDR: %s get domain list failed: 0x%x\n",
+ req->service_name, resp->resp.error);
+ return -EREMOTEIO;
+ }
+
+ return ret;

ret here will be the number of bytes decoded, but you really only care
about if this was an error or not. So I would suggest that you just
return 0 here.

yeah will return 0


+}
+
+static int pdr_locate_service(struct pdr_handle *pdr, struct pdr_service *pds)
+{
+ struct servreg_get_domain_list_resp *resp = NULL;
+ struct servreg_get_domain_list_req req;
+ int db_rev_count = 0, domains_read = 0;
+ struct servreg_location_entry *entry;
+ int ret, i;
+
+ resp = kzalloc(sizeof(*resp), GFP_KERNEL);
+ if (!resp)
+ return -ENOMEM;
+
+ /* Prepare req message */
+ strcpy(req.service_name, pds->service_name);
+ req.domain_offset_valid = true;
+ req.domain_offset = 0;
+
+ do {
+ req.domain_offset = domains_read;
+ ret = pdr_get_domain_list(&req, resp, pdr);
+ if (ret < 0)
+ goto out;
+
+ if (!domains_read)
+ db_rev_count = resp->db_rev_count;
+
+ if (db_rev_count != resp->db_rev_count) {
+ ret = -EAGAIN;
+ goto out;
+ }
+
+ for (i = domains_read; i < resp->domain_list_len; i++) {
+ entry = &resp->domain_list[i];
+
+ if (strlen(entry->name) > (SERVREG_NAME_LENGTH + 1))

In the event that the incoming string isn't NUL-terminated this will run
off the array.

if (strnlen(entry->name, SERVREG_NAME_LENGTH + 1) == SERVREG_NAME_LENGTH + 1)

or perhaps, relying on sizeof instead of duplicating the knowledge that
it is SERVREG_NAME_LENGTH + 1:

if (strnlen(entry->name, sizeof(entry->name)) == sizeof(entry->name))

yeah I'll use ^^ then or maybe switch
to a strncpy to further simplify things.


+ continue;
+
+ if (!strcmp(entry->name, pds->service_path)) {
+ pds->service_data_valid = entry->service_data_valid;
+ pds->service_data = entry->service_data;
+ pds->instance = entry->instance;
+ goto out;
+ }
+ }
+
+ /* Update ret to indicate that the service is not yet found */
+ ret = -EINVAL;
+
+ /* Always read total_domains from the response msg */
+ if (resp->domain_list_len > resp->total_domains)

Double space after '>'

thanks for catching this


+ resp->domain_list_len = resp->total_domains;
+
+ domains_read += resp->domain_list_len;
+ } while (domains_read < resp->total_domains);
+out:
+ kfree(resp);
+ return ret;
+}
+
+static void pdr_servloc_work(struct work_struct *work)
+{
+ struct pdr_handle *pdr = container_of(work, struct pdr_handle,
+ servloc_work);
+ struct pdr_list_node *servloc, *tmp;
+ struct pdr_service *pds;
+ int ret;
+
+ list_for_each_entry_safe(servloc, tmp, &pdr->servloc_list, node) {
+ pds = servloc->pds;
+
+ /* wait for PD Mapper to come up */
+ ret = wait_for_completion_timeout(&pdr->locator_available, 10 * HZ);

Afaict this means that we will only look for the locator during the 10
seconds that follows a pdr_add_lookup().

How about changing this so that you bail before the loop if the locator
hasn't showed up yet and schedule this worker when the locator is
registered?

yes makes sense, will do that.
But by doing this the client
will have to track timeouts
for lookups.


+ if (!ret) {
+ pr_err("PDR: SERVICE LOCATOR service wait failed\n");
+ ret = -ETIMEDOUT;
+ goto err;
+ }
+
+ ret = pdr_locate_service(pdr, pds);
+ if (ret < 0) {
+ pr_err("PDR: service lookup for %s failed: %d\n",
+ pds->service_name, ret);
+ goto err;
+ }
+
+ qmi_add_lookup(&pdr->servreg_client, pds->service, 1,
+ pds->instance);
+err:
+ list_del(&servloc->node);
+ kfree(servloc);
+
+ /* cleanup pds on error */
+ if (ret < 0) {
+ pds->state = SERVREG_LOCATOR_ERR;
+ pdr->status(pdr, pds);
+ list_del(&pds->node);
+ kfree(pds);
+ }
+ }
+}
[..]
+int pdr_add_lookup(struct pdr_handle *pdr, const char *service_name,
+ const char *service_path)
+{
+ struct pdr_service *pds, *pds_iter, *tmp;
+ struct pdr_list_node *servloc;
+ int ret;
+
+ if (!service_name || strlen(service_name) > (SERVREG_NAME_LENGTH + 1) ||
+ !service_path || strlen(service_path) > (SERVREG_NAME_LENGTH + 1))

When strlen(x) == SERVREG_NAME_LENGTH + 1 your strcpy below would write
SERVREG_NAME_LENGTH + 2 bytes to service_name and service_path, so drop
the + 1 from the comparisons.

yes will do that


+ return -EINVAL;
+
+ servloc = kzalloc(sizeof(*servloc), GFP_KERNEL);
+ if (!servloc)
+ return -ENOMEM;
+
+ pds = kzalloc(sizeof(*pds), GFP_KERNEL);
+ if (!pds) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ pds->service = SERVREG_NOTIFIER_SERVICE;
+ strcpy(pds->service_name, service_name);
+ strcpy(pds->service_path, service_path);
[..]
+int pdr_restart_pd(struct pdr_handle *pdr, const char *service_path)
+{
+ struct servreg_restart_pd_req req;
+ struct servreg_restart_pd_resp resp;
+ struct pdr_service *pds = NULL, *pds_iter, *tmp;
+ struct qmi_txn txn;
+ int ret;
+
+ if (!service_path || strlen(service_path) > (SERVREG_NAME_LENGTH + 1))

As above, drop the + 1

ditto


+ return -EINVAL;
+
[..]
+int pdr_handle_init(struct pdr_handle *pdr,
+ int (*status)(struct pdr_handle *pdr,
+ struct pdr_service *pds))
+{
[..]
+ pdr->servreg_wq = create_singlethread_workqueue("pdr_servreg_wq");
+ if (!pdr->servreg_wq)
+ return -ENOMEM;
+
+ pdr->indack_wq = alloc_ordered_workqueue("pdr_indack_wq", WQ_HIGHPRI);

The two workqueues means that we should be able to call pdr->status()
rom two concurrent contexts, I don't think our clients will expect that.


would creating another ordered wq to
relay all the pd->status make sense?

+ if (!pdr->indack_wq) {
+ ret = -ENOMEM;
+ goto destroy_servreg;
+ }
+

Regards,
Bjorn