Even Wikipedia only offers a negative definition:
«A database that focuses on data that is modeled in means other than tabular relations»
Repository at github.com/dbu/postgres-json
CREATE TABLE auction_jsonb (
id SERIAL NOT NULL,
title VARCHAR(255) NOT NULL,
current_price INT DEFAULT NULL,
item JSONB NOT NULL,
PRIMARY KEY(id)
);
| JSON: Untyped, stored raw | JSONB: Stored structured |
|---|---|
|
|
=> Use JSONB unless you rely on non-standard behaviour.
item->'author' = '"David"'
or, 2.5 times faster
item->>'author' = 'David'
|
> The JSON information |
>> Extract primitive type |
SELECT COUNT(*)
FROM auction_jsonb
WHERE item->>'author' = 'David';
| Rows | json | jsonb |
|---|---|---|
| 10k | 136 | 31 |
| 100k | 319 | 46 |
| 1M | 1'280 | 157 |
| 10M | 15'505 | 2'768 |
Query time in milliseconds
CREATE INDEX json_author
ON auction_json_indexed ((item->>'author'));
| Rows | json | json indexed | jsonb | jsonb indexed |
|---|---|---|---|---|
| 10k | 136 | 25 | 31 | 20 |
| 100k | 319 | 12 | 46 | 13 |
| 1M | 1'280 | 3 | 157 | 3 |
| 10M | 15'505 | 7 | 2'768 | 6 |
Query time in milliseconds
At 10M rows, postgres stopped using the index and went for full table scan
-- Value <= 1.15 makes postgres use the index
ALTER TABLESPACE pg_default
SET (random_page_cost=1.1);
Thanks Frank Heikens on stackoverflow for hinting random_page_cost
CREATE TABLE auction_generated_fields (
id SERIAL NOT NULL,
author VARCHAR(255) generated always
as (item->>'author') stored,
item JSONB NOT NULL, PRIMARY KEY(id));
| Rows | jsonb indexed | generated | generated indexed |
|---|---|---|---|
| 10k | 20 | 21 | 18 |
| 100k | 13 | 30 | 10 |
| 1M | 3 | 88 | 3 |
| 10M | 6 | 786 | 4 |
Query time in milliseconds
CREATE FUNCTION text_to_timestamp(text) RETURNS TIMESTAMP
LANGUAGE sql IMMUTABLE AS
$$
SELECT CASE
WHEN $1 ~ '^\d{4}-\d{2}-\d{2}[ |T]\d{2}:\d{2}:\d{2}(\.\d+)?(\+00:00)?$'
THEN CAST($1 AS timestamp without time zone)
END
$$;
CREATE TABLE auction_generated_fields (
id SERIAL NOT NULL,
start_date TIMESTAMP(0) generated always
AS (text_to_timestamp(item->>'startDate')) stored,
item JSONB NOT NULL, PRIMARY KEY(id));
item->typeJSON_QUERY(item, '$.author')item #> '{type}'item['type']No sharing of indexes.
=> Decide which construct you use
contains
item @> '{"author": "David"}'
key exists
item ? 'author'
jsonb_exists(item, 'author')
any key exists
item ?| array['author','foo']
jsonb_exists_any(item, array['a..', 'f..']
all keys exist
item ?& array['author','title']
jsonb_exists_all(item, array['a..', 'f..']
CREATE INDEX auction_json_gin_idx
ON auction_jsonb_gin USING GIN (item);
SELECT COUNT(*)
FROM auction_jsonb_gin
WHERE item @> '{"author": "David"}';
| Rows | ->> indexed | @> no index | @> with index | @> with GIN |
|---|---|---|---|---|
| 10k | 20 | 5 | 5 | 5 |
| 100k | 13 | 20 | 19 | 5 |
| 1M | 3 | 144 | 149 | 13 |
| 10M | 6 | 1'239 | 1'235 | 76 |
Query time in milliseconds
Speed up arbitrary JSON contains queries, but not other queries
Data distribution of the indexed column makes a huge difference
| Variants | json | json idx | jsonb | jsonb idx | GIN >> | GIN @ | gen | gen idx |
|---|---|---|---|---|---|---|---|---|
| 10 | 12'372 | 784 | 1'895 | 676 | 1'882 | 1'364 | 1'030 | 44 |
| 1'000 | 12'424 | 6 | 1'583 | 6 | 1'260 | 58 | 931 | 3 |
| 100'000 | 13'197 | 3 | 1'710 | 3 | 1'501 | 5 | 838 | 3 |
Response times in milliseconds, table with 10 million rows
Jsonb, indexes and generated fields do not come for free
| Rows | json | json idx | jsonb | jsonb idx | GIN | gen | gen idx |
|---|---|---|---|---|---|---|---|
| 10k | 60 | 90 | 90 | 120 | 130 | 94 | 143 |
| 100k | 550 | 740 | 820 | 990 | 2'110 | 890 | 1'000 |
| 1M | 5'150 | 8'500 | 7'670 | 11'340 | 24'000 | 8'440 | 11'773 |
| 10M | 55'000 | 96'000 | 97'000 | 115'000 | 256'000 | 84'000 | 116'000 |
Inserting dummy rows, duration in milliseconds
Manipulate JSON instead of replacing it completely
-- update, ignore if not exists
UPDATE auction_jsonb
SET item = jsonb_set(item, '{author}', '"value"', false)
WHERE id=42;
-- update or create
UPDATE auction_jsonb
SET item = jsonb_set(item, '{author}', 'value', true)
WHERE id=42;
Postgres is replacing the whole JSON, query is not faster
-- delete attribute
UPDATE auction_jsonb
SET item = item - 'key'
WHERE ID=42;
-- delete attribute alternate
UPDATE auction_jsonb
SET item = jsonb_set_lax(item, '{author}', null, true, 'delete_key')
WHERE ID=42;
-- delete multiple
item - array['key1', 'key2']
| raise_exception | Fail if value is null |
| use_json_null | Set JSON field to null (default) |
| delete_key | Delete the key on null value |
| return_target | Leave previous value when new is null |
set_jsonb_lax(item, '{author}', null, true, 'return_target');
pg_jsonschema / postgres-json-schema)
|| concat array / merge objectsjsonb_build_object('key', 'value', 'k2', 'v2')
#[ORM\Column(type: Types::JSON)]
private array $item;
#[ORM\Column(type: Types::JSON, options: ['jsonb' => true])]
private array $item;
Index can not be defined in Doctrine.
You have to manually add them to the migration.
$this->addSql('CREATE INDEX json_author ON auction_json_indexed ((item->>\'author\'))');
#[ORM\Column(
length: 255,
nullable: true,
insertable: false,
updatable: false,
columnDefinition: "VARCHAR(255) generated always as (item->>'author') stored",
generated: "ALWAYS"
)]
private ?string $author = null;
Note: the column definition is native SQL, not DQL
You can create native queries with Doctrine DBAL
Or use scienta/doctrine-json-functions
# config/packages/doctrine.yaml
doctrine:
orm:
dql:
string_functions:
JSON_GET_TEXT: Scienta\...\Postgresql\JsonGetText
JSONB_CONTAINS: Scienta\...\Postgresql\JsonbContains
Register boolean functions as string and compare with = true
$qb = $this->createQueryBuilder('a');
$qb->select('COUNT(a)')
// Need to qualify with a. in front of item
->where("JSON_GET_TEXT(a.item, 'author') = :author")
->setParameter('author', $authorName)
;
return $qb->getQuery()->getSingleScalarResult();
$qb = $this->createQueryBuilder('a');
$connection = $this->getEntityManager()->getConnection();
// Could not figure out how to pass author as parameter.
// Not sure if escaping is safe,
// needs to be verified for direct user input
$author = str_replace('"', '\"', $authorName);
$author = trim($connection->quote($author), "'");
$qb->select('COUNT(a)')
->where("JSONB_CONTAINS(a.item, '{\"author\": \"$author\"}') = true")
;
return $qb->getQuery()->getSingleScalarResult();
| Rows | jsonb | dbal | dbal, batch 1k | entities |
|---|---|---|---|---|
| 10k | 60 | 4519 | 163 | 6290 |
| 100k | 550 | 49964 | 1587 | 53823 |
See pgdaash.io
json_each(item)
jsonb_each(item)
SELECT *
FROM jsonb_each
((SELECT item FROM auction_jsonb WHERE id=1));