postgresql – Why is OR statement slower than UNION

Database version: Postgresql 12.6

I have a table with 600000 records.

The table has columns:

  • name (varchar)
  • location_type (int) enum values: (1,2,3)
  • ancestry (varchar)

Indexes:

The ancestry column is a way to build a tree where every row has an ancestry containing all parent ids separated by ‘/’

Consider the following example:

id name ancestry
1 root null
5 node ‘1’
12 node ‘1/5’
22 leaf ‘1/5/12’

The following query takes 686 ms to execute:

SELECT * FROM geolocations
WHERE EXISTS (
   SELECT 1 FROM geolocations g2
   WHERE g2.ancestry = 
      CONCAT(geolocations.ancestry, '/', geolocations.id)
)

This query runs in 808 ms seconds:

SELECT * FROM geolocations
WHERE location_type = 2

When combining both queried with an OR it takes around 4 seconds 475 ms to finish if it ever finishes.

SELECT * FROM geolocations
WHERE EXISTS (
   SELECT 1 FROM geolocations g2
   WHERE g2.ancestry = 
      CONCAT(geolocations.ancestry, '/', geolocations.id)
) OR location_type = 2

Explain:

(
  {
    "Plan": {
      "Node Type": "Seq Scan",
      "Parallel Aware": false,
      "Relation Name": "geolocations",
      "Alias": "geolocations",
      "Startup Cost": 0,
      "Total Cost": 2760473.54,
      "Plan Rows": 582910,
      "Plan Width": 68,
      "Filter": "((SubPlan 1) OR (location_type = 2))",
      "Plans": (
        {
          "Node Type": "Index Only Scan",
          "Parent Relationship": "SubPlan",
          "Subplan Name": "SubPlan 1",
          "Parallel Aware": false,
          "Scan Direction": "Forward",
          "Index Name": "index_geolocations_on_ancestry",
          "Relation Name": "geolocations",
          "Alias": "g2",
          "Startup Cost": 0.43,
          "Total Cost": 124.91,
          "Plan Rows": 30,
          "Plan Width": 0,
          "Index Cond": "(ancestry = concat(geolocations.ancestry, '/', geolocations.id))"
        }
      )
    },
    "JIT": {
      "Worker Number": -1,
      "Functions": 8,
      "Options": {
        "Inlining": true,
        "Optimization": true,
        "Expressions": true,
        "Deforming": true
      }
    }
  }
)

While combining them with a union takes 1 sec 916 ms

SELECT * FROM geolocations
WHERE EXISTS (
   SELECT 1 FROM geolocations g2
   WHERE g2.ancestry = 
      CONCAT(geolocations.ancestry, '/', geolocations.id)
) UNION SELECT * FROM geolocations WHERE location_type = 2

Explain

(
  {
    "Plan": {
      "Node Type": "Unique",
      "Parallel Aware": false,
      "Startup Cost": 308693.44,
      "Total Cost": 332506.74,
      "Plan Rows": 865938,
      "Plan Width": 188,
      "Plans": (
        {
          "Node Type": "Sort",
          "Parent Relationship": "Outer",
          "Parallel Aware": false,
          "Startup Cost": 308693.44,
          "Total Cost": 310858.29,
          "Plan Rows": 865938,
          "Plan Width": 188,
          "Sort Key": (
            "geolocations.id",
            "geolocations.name",
            "geolocations.location_type",
            "geolocations.pricing",
            "geolocations.ancestry",
            "geolocations.geolocationable_id",
            "geolocations.geolocationable_type",
            "geolocations.created_at",
            "geolocations.updated_at",
            "geolocations.info"
          ),
          "Plans": (
            {
              "Node Type": "Append",
              "Parent Relationship": "Outer",
              "Parallel Aware": false,
              "Startup Cost": 15851.41,
              "Total Cost": 63464.05,
              "Plan Rows": 865938,
              "Plan Width": 188,
              "Subplans Removed": 0,
              "Plans": (
                {
                  "Node Type": "Hash Join",
                  "Parent Relationship": "Member",
                  "Parallel Aware": false,
                  "Join Type": "Inner",
                  "Startup Cost": 15851.41,
                  "Total Cost": 35074.94,
                  "Plan Rows": 299882,
                  "Plan Width": 68,
                  "Inner Unique": true,
                  "Hash Cond": "(concat(geolocations.ancestry, '/', geolocations.id) = (g2.ancestry)::text)",
                  "Plans": (
                    {
                      "Node Type": "Seq Scan",
                      "Parent Relationship": "Outer",
                      "Parallel Aware": false,
                      "Relation Name": "geolocations",
                      "Alias": "geolocations",
                      "Startup Cost": 0,
                      "Total Cost": 13900.63,
                      "Plan Rows": 599763,
                      "Plan Width": 68
                    },
                    {
                      "Node Type": "Hash",
                      "Parent Relationship": "Inner",
                      "Parallel Aware": false,
                      "Startup Cost": 15600.65,
                      "Total Cost": 15600.65,
                      "Plan Rows": 20061,
                      "Plan Width": 12,
                      "Plans": (
                        {
                          "Node Type": "Aggregate",
                          "Strategy": "Hashed",
                          "Partial Mode": "Simple",
                          "Parent Relationship": "Outer",
                          "Parallel Aware": false,
                          "Startup Cost": 15400.04,
                          "Total Cost": 15600.65,
                          "Plan Rows": 20061,
                          "Plan Width": 12,
                          "Group Key": (
                            "(g2.ancestry)::text"
                          ),
                          "Plans": (
                            {
                              "Node Type": "Seq Scan",
                              "Parent Relationship": "Outer",
                              "Parallel Aware": false,
                              "Relation Name": "geolocations",
                              "Alias": "g2",
                              "Startup Cost": 0,
                              "Total Cost": 13900.63,
                              "Plan Rows": 599763,
                              "Plan Width": 12
                            }
                          )
                        }
                      )
                    }
                  )
                },
                {
                  "Node Type": "Seq Scan",
                  "Parent Relationship": "Member",
                  "Parallel Aware": false,
                  "Relation Name": "geolocations",
                  "Alias": "geolocations_1",
                  "Startup Cost": 0,
                  "Total Cost": 15400.04,
                  "Plan Rows": 566056,
                  "Plan Width": 68,
                  "Filter": "(location_type = 2)"
                }
              )
            }
          )
        }
      )
    },
    "JIT": {
      "Worker Number": -1,
      "Functions": 15,
      "Options": {
        "Inlining": false,
        "Optimization": false,
        "Expressions": true,
        "Deforming": true
      }
    }
  }
)

My question is, why does postgresql execute the OR query much slower?