/* json-path.h - JSONPath implementation * * This file is part of JSON-GLib * Copyright © 2011 Intel Corp. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: * Emmanuele Bassi */ /** * JsonPath: * * `JsonPath` is a simple class implementing the JSONPath syntax for extracting * data out of a JSON tree. * * While the semantics of the JSONPath expressions are heavily borrowed by the * XPath specification for XML, the syntax follows the ECMAScript origins of * JSON. * * Once a `JsonPath` instance has been created, it has to compile a JSONPath * expression using [method@Json.Path.compile] before being able to match it to * a JSON tree; the same `JsonPath` instance can be used to match multiple JSON * trees. It it also possible to compile a new JSONPath expression using the * same `JsonPath` instance; the previous expression will be discarded only if * the compilation of the new expression is successful. * * The simple convenience function [func@Json.Path.query] can be used for * one-off matching. * * ## Syntax of the JSONPath expressions * * A JSONPath expression is composed by path indices and operators. * Each path index can either be a member name or an element index inside * a JSON tree. A JSONPath expression must start with the `$` operator; each * path index is separated using either the dot notation or the bracket * notation, e.g.: * * ``` * // dot notation * $.store.book[0].title * * // bracket notation * $['store']['book'][0]['title'] * ``` * * The available operators are: * * * The `$` character represents the root node of the JSON tree, and * matches the entire document. * * * Child nodes can either be matched using `.` or `[]`. For instance, * both `$.store.book` and `$['store']['book']` match the contents of * the book member of the store object. * * * Child nodes can be reached without specifying the whole tree structure * through the recursive descent operator, or `..`. For instance, * `$..author` matches all author member in every object. * * * Child nodes can grouped through the wildcard operator, or `*`. For * instance, `$.store.book[*].author` matches all author members of any * object element contained in the book array of the store object. * * * Element nodes can be accessed using their index (starting from zero) * in the subscript operator `[]`. For instance, `$.store.book[0]` matches * the first element of the book array of the store object. * * * Subsets of element nodes can be accessed using the set notation * operator `[i,j,...]`. For instance, `$.store.book[0,2]` matches the * elements 0 and 2 (the first and third) of the book array of the store * object. * * * Slices of element nodes can be accessed using the slice notation * operation `[start:end:step]`. If start is omitted, the starting index * of the slice is implied to be zero; if end is omitted, the ending index * of the slice is implied to be the length of the array; if step is * omitted, the step of the slice is implied to be 1. For instance, * `$.store.book[:2]` matches the first two elements of the book array * of the store object. * * More information about JSONPath is available on Stefan Gössner's * [JSONPath website](http://goessner.net/articles/JsonPath/). * * ## Example of JSONPath matches * * The following example shows some of the results of using `JsonPath` * on a JSON tree. We use the following JSON description of a bookstore: * * ```json * { "store": { * "book": [ * { "category": "reference", "author": "Nigel Rees", * "title": "Sayings of the Century", "price": "8.95" }, * { "category": "fiction", "author": "Evelyn Waugh", * "title": "Sword of Honour", "price": "12.99" }, * { "category": "fiction", "author": "Herman Melville", * "title": "Moby Dick", "isbn": "0-553-21311-3", * "price": "8.99" }, * { "category": "fiction", "author": "J. R. R. Tolkien", * "title": "The Lord of the Rings", "isbn": "0-395-19395-8", * "price": "22.99" } * ], * "bicycle": { "color": "red", "price": "19.95" } * } * } * ``` * * We can parse the JSON using [class@Json.Parser]: * * ```c * JsonParser *parser = json_parser_new (); * json_parser_load_from_data (parser, json_data, -1, NULL); * ``` * * If we run the following code: * * ```c * JsonNode *result; * JsonPath *path = json_path_new (); * json_path_compile (path, "$.store..author", NULL); * result = json_path_match (path, json_parser_get_root (parser)); * ``` * * The `result` node will contain an array with all values of the * author member of the objects in the JSON tree. If we use a * [class@Json.Generator] to convert the `result` node to a string * and print it: * * ```c * JsonGenerator *generator = json_generator_new (); * json_generator_set_root (generator, result); * char *str = json_generator_to_data (generator, NULL); * g_print ("Results: %s\n", str); * ``` * * The output will be: * * ```json * ["Nigel Rees","Evelyn Waugh","Herman Melville","J. R. R. Tolkien"] * ``` * * Since: 0.14 */ #include "config.h" #include #include #include "json-path.h" #include "json-debug.h" #include "json-types-private.h" typedef enum { JSON_PATH_NODE_ROOT, JSON_PATH_NODE_CHILD_MEMBER, JSON_PATH_NODE_CHILD_ELEMENT, JSON_PATH_NODE_RECURSIVE_DESCENT, JSON_PATH_NODE_WILDCARD_MEMBER, JSON_PATH_NODE_WILDCARD_ELEMENT, JSON_PATH_NODE_ELEMENT_SET, JSON_PATH_NODE_ELEMENT_SLICE } PathNodeType; typedef struct _PathNode PathNode; struct _JsonPath { GObject parent_instance; /* the compiled path */ GList *nodes; guint is_compiled : 1; }; struct _JsonPathClass { GObjectClass parent_class; }; struct _PathNode { PathNodeType node_type; union { /* JSON_PATH_NODE_CHILD_ELEMENT */ guint element_index; /* JSON_PATH_NODE_CHILD_MEMBER */ char *member_name; /* JSON_PATH_NODE_ELEMENT_SET */ struct { int n_indices; int *indices; } set; /* JSON_PATH_NODE_ELEMENT_SLICE */ struct { int start, end, step; } slice; } data; }; G_DEFINE_QUARK (json-path-error-quark, json_path_error) G_DEFINE_TYPE (JsonPath, json_path, G_TYPE_OBJECT) static void path_node_free (gpointer data) { if (data != NULL) { PathNode *node = data; switch (node->node_type) { case JSON_PATH_NODE_CHILD_MEMBER: g_free (node->data.member_name); break; case JSON_PATH_NODE_ELEMENT_SET: g_free (node->data.set.indices); break; default: break; } g_free (node); } } static void json_path_finalize (GObject *gobject) { JsonPath *self = JSON_PATH (gobject); g_list_free_full (self->nodes, path_node_free); G_OBJECT_CLASS (json_path_parent_class)->finalize (gobject); } static void json_path_class_init (JsonPathClass *klass) { G_OBJECT_CLASS (klass)->finalize = json_path_finalize; } static void json_path_init (JsonPath *self G_GNUC_UNUSED) { } /** * json_path_new: * * Creates a new `JsonPath` instance. * * Once created, the `JsonPath` object should be used with * [method@Json.Path.compile] and [method@Json.Path.match]. * * Return value: (transfer full): the newly created path * * Since: 0.14 */ JsonPath * json_path_new (void) { return g_object_new (JSON_TYPE_PATH, NULL); } #ifdef JSON_ENABLE_DEBUG /* used as the function for a g_list_foreach() on a list of PathNode; needs * a GString as the payload to build the output string */ static void json_path_foreach_print (gpointer data, gpointer user_data) { PathNode *cur_node = data; GString *buf = user_data; switch (cur_node->node_type) { case JSON_PATH_NODE_ROOT: g_string_append (buf, "data.member_name); break; case JSON_PATH_NODE_CHILD_ELEMENT: g_string_append_printf (buf, "data.element_index); break; case JSON_PATH_NODE_RECURSIVE_DESCENT: g_string_append (buf, "data.set.n_indices - 1; i++) g_string_append_printf (buf, "'%d', ", cur_node->data.set.indices[i]); g_string_append_printf (buf, "'%d'", cur_node->data.set.indices[i]); } break; case JSON_PATH_NODE_ELEMENT_SLICE: g_string_append_printf (buf, "data.slice.start, cur_node->data.slice.end, cur_node->data.slice.step); break; default: g_string_append (buf, ""); } #endif /* JSON_ENABLE_DEBUG */ /** * json_path_compile: * @path: a path * @expression: a JSONPath expression * @error: return location for a #GError, or %NULL * * Validates and decomposes the given expression. * * A JSONPath expression must be compiled before calling * [method@Json.Path.match]. * * Return value: `TRUE` if the compilation was successful, and `FALSE` * otherwise * * Since: 0.14 */ gboolean json_path_compile (JsonPath *path, const char *expression, GError **error) { const char *p, *end_p; PathNode *root = NULL; GList *nodes = NULL; g_return_val_if_fail (expression != NULL, FALSE); p = expression; while (*p != '\0') { switch (*p) { case '$': { PathNode *node; if (root != NULL) { g_set_error_literal (error, JSON_PATH_ERROR, JSON_PATH_ERROR_INVALID_QUERY, _("Only one root node is allowed in a JSONPath expression")); return FALSE; } if (!(*(p + 1) == '.' || *(p + 1) == '[' || *(p + 1) == '\0')) { g_set_error (error, JSON_PATH_ERROR, JSON_PATH_ERROR_INVALID_QUERY, /* translators: the %c is the invalid character */ _("Root node followed by invalid character “%c”"), *(p + 1)); return FALSE; } node = g_new0 (PathNode, 1); node->node_type = JSON_PATH_NODE_ROOT; root = node; nodes = g_list_prepend (NULL, root); } break; case '.': case '[': { PathNode *node = NULL; if (*p == '.' && *(p + 1) == '.') { node = g_new0 (PathNode, 1); node->node_type = JSON_PATH_NODE_RECURSIVE_DESCENT; } else if (*p == '.' && *(p + 1) == '*') { node = g_new0 (PathNode, 1); node->node_type = JSON_PATH_NODE_WILDCARD_MEMBER; p += 1; } else if (*p == '.') { end_p = p + 1; while (!(*end_p == '.' || *end_p == '[' || *end_p == '\0')) end_p += 1; if (end_p == p + 1) { g_set_error_literal (error, JSON_PATH_ERROR, JSON_PATH_ERROR_INVALID_QUERY, _("Missing member name or wildcard after . character")); goto fail; } node = g_new0 (PathNode, 1); node->node_type = JSON_PATH_NODE_CHILD_MEMBER; node->data.member_name = g_strndup (p + 1, end_p - p - 1); p = end_p - 1; } else if (*p == '[' && *(p + 1) == '\'') { if (*(p + 2) == '*' && *(p + 3) == '\'' && *(p + 4) == ']') { node = g_new0 (PathNode, 1); node->node_type = JSON_PATH_NODE_WILDCARD_MEMBER; p += 4; } else { node = g_new0 (PathNode, 1); node->node_type = JSON_PATH_NODE_CHILD_MEMBER; end_p = strchr (p + 2, '\''); node->data.member_name = g_strndup (p + 2, end_p - p - 2); p = end_p + 1; } } else if (*p == '[' && *(p + 1) == '*' && *(p + 2) == ']') { node = g_new0 (PathNode, 1); node->node_type = JSON_PATH_NODE_WILDCARD_ELEMENT; p += 1; } else if (*p == '[') { int sign = 1; int idx; end_p = p + 1; if (*end_p == '-') { sign = -1; end_p += 1; } /* slice with missing start */ if (*end_p == ':') { int slice_end = g_ascii_strtoll (end_p + 1, (char **) &end_p, 10) * sign; int slice_step = 1; if (*end_p == ':') { end_p += 1; if (*end_p == '-') { sign = -1; end_p += 1; } else sign = 1; slice_step = g_ascii_strtoll (end_p, (char **) &end_p, 10) * sign; if (*end_p != ']') { g_set_error (error, JSON_PATH_ERROR, JSON_PATH_ERROR_INVALID_QUERY, _("Malformed slice expression “%*s”"), (int)(end_p - p), p + 1); goto fail; } } node = g_new0 (PathNode, 1); node->node_type = JSON_PATH_NODE_ELEMENT_SLICE; node->data.slice.start = 0; node->data.slice.end = slice_end; node->data.slice.step = slice_step; nodes = g_list_prepend (nodes, node); p = end_p; break; } idx = g_ascii_strtoll (end_p, (char **) &end_p, 10) * sign; if (*end_p == ',') { GArray *indices = g_array_new (FALSE, TRUE, sizeof (int)); g_array_append_val (indices, idx); while (*end_p != ']') { end_p += 1; if (*end_p == '-') { sign = -1; end_p += 1; } else sign = 1; idx = g_ascii_strtoll (end_p, (char **) &end_p, 10) * sign; if (!(*end_p == ',' || *end_p == ']')) { g_array_unref (indices); g_set_error (error, JSON_PATH_ERROR, JSON_PATH_ERROR_INVALID_QUERY, _("Invalid set definition “%*s”"), (int)(end_p - p), p + 1); goto fail; } g_array_append_val (indices, idx); } node = g_new0 (PathNode, 1); node->node_type = JSON_PATH_NODE_ELEMENT_SET; node->data.set.n_indices = indices->len; node->data.set.indices = (int *) g_array_free (indices, FALSE); nodes = g_list_prepend (nodes, node); p = end_p; break; } else if (*end_p == ':') { int slice_start = idx; int slice_end = 0; int slice_step = 1; end_p += 1; if (*end_p == '-') { sign = -1; end_p += 1; } else sign = 1; slice_end = g_ascii_strtoll (end_p, (char **) &end_p, 10) * sign; if (*end_p == ':') { end_p += 1; if (*end_p == '-') { sign = -1; end_p += 1; } else sign = 1; slice_step = g_ascii_strtoll (end_p + 1, (char **) &end_p, 10) * sign; } if (*end_p != ']') { g_set_error (error, JSON_PATH_ERROR, JSON_PATH_ERROR_INVALID_QUERY, _("Invalid slice definition “%*s”"), (int)(end_p - p), p + 1); goto fail; } node = g_new0 (PathNode, 1); node->node_type = JSON_PATH_NODE_ELEMENT_SLICE; node->data.slice.start = slice_start; node->data.slice.end = slice_end; node->data.slice.step = slice_step; nodes = g_list_prepend (nodes, node); p = end_p; break; } else if (*end_p == ']') { node = g_new0 (PathNode, 1); node->node_type = JSON_PATH_NODE_CHILD_ELEMENT; node->data.element_index = idx; nodes = g_list_prepend (nodes, node); p = end_p; break; } else { g_set_error (error, JSON_PATH_ERROR, JSON_PATH_ERROR_INVALID_QUERY, _("Invalid array index definition “%*s”"), (int)(end_p - p), p + 1); goto fail; } } else break; if (node != NULL) nodes = g_list_prepend (nodes, node); } break; default: if (nodes == NULL) { g_set_error(error, JSON_PATH_ERROR, JSON_PATH_ERROR_INVALID_QUERY, _("Invalid first character “%c”"), *p); return FALSE; } break; } p += 1; } nodes = g_list_reverse (nodes); #ifdef JSON_ENABLE_DEBUG if (JSON_HAS_DEBUG (PATH)) { GString *buf = g_string_new (NULL); g_list_foreach (nodes, json_path_foreach_print, buf); g_message ("[PATH] " G_STRLOC ": expression '%s' => '%s'", expression, buf->str); g_string_free (buf, TRUE); } #endif /* JSON_ENABLE_DEBUG */ g_list_free_full (path->nodes, path_node_free); path->nodes = nodes; path->is_compiled = (path->nodes != NULL); return path->nodes != NULL; fail: g_list_free_full (nodes, path_node_free); return FALSE; } static void walk_path_node (GList *path, JsonNode *root, JsonArray *results) { PathNode *node = path->data; switch (node->node_type) { case JSON_PATH_NODE_ROOT: if (path->next != NULL) walk_path_node (path->next, root, results); else json_array_add_element (results, json_node_copy (root)); break; case JSON_PATH_NODE_CHILD_MEMBER: if (JSON_NODE_HOLDS_OBJECT (root)) { JsonObject *object = json_node_get_object (root); if (json_object_has_member (object, node->data.member_name)) { JsonNode *member = json_object_get_member (object, node->data.member_name); if (path->next == NULL) { JSON_NOTE (PATH, "end of path at member '%s'", node->data.member_name); json_array_add_element (results, json_node_copy (member)); } else walk_path_node (path->next, member, results); } } break; case JSON_PATH_NODE_CHILD_ELEMENT: if (JSON_NODE_HOLDS_ARRAY (root)) { JsonArray *array = json_node_get_array (root); if (json_array_get_length (array) >= node->data.element_index) { JsonNode *element = json_array_get_element (array, node->data.element_index); if (path->next == NULL) { JSON_NOTE (PATH, "end of path at element '%d'", node->data.element_index); json_array_add_element (results, json_node_copy (element)); } else walk_path_node (path->next, element, results); } } break; case JSON_PATH_NODE_RECURSIVE_DESCENT: { PathNode *tmp = path->next->data; switch (json_node_get_node_type (root)) { case JSON_NODE_OBJECT: { JsonObject *object = json_node_get_object (root); GQueue *members = json_object_get_members_internal (object); GList *l; for (l = members->head; l != NULL; l = l->next) { JsonNode *m = json_object_get_member (object, l->data); if (tmp->node_type == JSON_PATH_NODE_CHILD_MEMBER && strcmp (tmp->data.member_name, l->data) == 0) { JSON_NOTE (PATH, "entering '%s'", tmp->data.member_name); walk_path_node (path->next, root, results); } else { JSON_NOTE (PATH, "recursing into '%s'", (char *) l->data); walk_path_node (path, m, results); } } } break; case JSON_NODE_ARRAY: { JsonArray *array = json_node_get_array (root); GList *members, *l; guint i; members = json_array_get_elements (array); for (l = members, i = 0; l != NULL; l = l->next, i += 1) { JsonNode *m = l->data; if (tmp->node_type == JSON_PATH_NODE_CHILD_ELEMENT && tmp->data.element_index == i) { JSON_NOTE (PATH, "entering '%d'", tmp->data.element_index); walk_path_node (path->next, root, results); } else { JSON_NOTE (PATH, "recursing into '%d'", i); walk_path_node (path, m, results); } } g_list_free (members); } break; default: break; } } break; case JSON_PATH_NODE_WILDCARD_MEMBER: if (JSON_NODE_HOLDS_OBJECT (root)) { JsonObject *object = json_node_get_object (root); GQueue *members = json_object_get_members_internal (object); GList *l; for (l = members->head; l != NULL; l = l->next) { JsonNode *member = json_object_get_member (object, l->data); if (path->next != NULL) walk_path_node (path->next, member, results); else { JSON_NOTE (PATH, "glob match member '%s'", (char *) l->data); json_array_add_element (results, json_node_copy (member)); } } } else json_array_add_element (results, json_node_copy (root)); break; case JSON_PATH_NODE_WILDCARD_ELEMENT: if (JSON_NODE_HOLDS_ARRAY (root)) { JsonArray *array = json_node_get_array (root); GList *elements, *l; int i; elements = json_array_get_elements (array); for (l = elements, i = 0; l != NULL; l = l->next, i += 1) { JsonNode *element = l->data; if (path->next != NULL) walk_path_node (path->next, element, results); else { JSON_NOTE (PATH, "glob match element '%d'", i); json_array_add_element (results, json_node_copy (element)); } } g_list_free (elements); } else json_array_add_element (results, json_node_copy (root)); break; case JSON_PATH_NODE_ELEMENT_SET: if (JSON_NODE_HOLDS_ARRAY (root)) { JsonArray *array = json_node_get_array (root); int i; for (i = 0; i < node->data.set.n_indices; i += 1) { int idx = node->data.set.indices[i]; JsonNode *element = json_array_get_element (array, idx); if (path->next != NULL) walk_path_node (path->next, element, results); else { JSON_NOTE (PATH, "set element '%d'", idx); json_array_add_element (results, json_node_copy (element)); } } } break; case JSON_PATH_NODE_ELEMENT_SLICE: if (JSON_NODE_HOLDS_ARRAY (root)) { JsonArray *array = json_node_get_array (root); int i, start, end; if (node->data.slice.start < 0) { start = json_array_get_length (array) + node->data.slice.start; end = json_array_get_length (array) + node->data.slice.end; } else { start = node->data.slice.start; end = node->data.slice.end; } for (i = start; i < end; i += node->data.slice.step) { JsonNode *element = json_array_get_element (array, i); if (path->next != NULL) walk_path_node (path->next, element, results); else { JSON_NOTE (PATH, "slice element '%d'", i); json_array_add_element (results, json_node_copy (element)); } } } break; default: break; } } /** * json_path_match: * @path: a compiled path * @root: the root node of the JSON data to match * * Matches the JSON tree pointed by `root` using the expression compiled * into the `JsonPath`. * * The nodes matching the expression will be copied into an array. * * Return value: (transfer full): a newly-created node of type * `JSON_NODE_ARRAY` containing the array of matching nodes * * Since: 0.14 */ JsonNode * json_path_match (JsonPath *path, JsonNode *root) { JsonArray *results; JsonNode *retval; g_return_val_if_fail (JSON_IS_PATH (path), NULL); g_return_val_if_fail (path->is_compiled, NULL); g_return_val_if_fail (root != NULL, NULL); results = json_array_new (); walk_path_node (path->nodes, root, results); retval = json_node_new (JSON_NODE_ARRAY); json_node_take_array (retval, results); return retval; } /** * json_path_query: * @expression: a JSONPath expression * @root: the root of a JSON tree * @error: return location for a #GError, or %NULL * * Queries a JSON tree using a JSONPath expression. * * This function is a simple wrapper around [ctor@Json.Path.new], * [method@Json.Path.compile], and [method@Json.Path.match]. It implicitly * creates a `JsonPath` instance, compiles the given expression and matches * it against the JSON tree pointed by `root`. * * Return value: (transfer full): a newly-created node of type * `JSON_NODE_ARRAY` containing the array of matching nodes * * Since: 0.14 */ JsonNode * json_path_query (const char *expression, JsonNode *root, GError **error) { JsonPath *path = json_path_new (); JsonNode *retval; if (!json_path_compile (path, expression, error)) { g_object_unref (path); return NULL; } retval = json_path_match (path, root); g_object_unref (path); return retval; }