SQL dialect(SQL 方言)¶
Foundry SQL is generally a subset of Spark SQL ↗ with ANSI compliance.
Quick reference¶
Common operations¶
-- Create a table
CREATE TABLE `/path/to/table` (id INT, name STRING);
-- Insert data
INSERT INTO `/path/to/table` VALUES (1, 'foo'), (2, 'bar');
-- Basic query
SELECT * FROM `/path/to/table` WHERE id = 1;
-- Join tables
SELECT a.*, b.name
FROM `/path/to/table1` a
LEFT JOIN `/path/to/table2` b ON a.id = b.id;
-- Aggregate
SELECT category, COUNT(*), AVG(price)
FROM `/path/to/products`
GROUP BY category
HAVING COUNT(*) > 10;
-- Window function
SELECT name, salary,
RANK() OVER (ORDER BY salary DESC) as rank
FROM `/path/to/employees`;
-- CTE (Common Table Expression)
WITH summary AS (
SELECT category, SUM(amount) as total
FROM `/path/to/sales`
GROUP BY category
)
SELECT * FROM summary WHERE total > 1000;
Commonly used functions¶
| Category | Functions |
|---|---|
| Aggregation | count(), sum(), avg(), min(), max(), count(DISTINCT col), collect_set() |
| String | upper(), lower(), trim(), concat(), concat_ws(), contains(), split(), substr() |
| Date/Time | current_date(), current_timestamp(), date_add(), date_diff(), to_date(), year(), month(), day() |
| Math | abs(), round(), ceil(), floor(), pow(), sqrt(), mod() |
| Array | array(), array_contains(), array_size(), array_distinct(), explode(), flatten() |
| Conditional | CASE WHEN ... THEN ... END, coalesce(), nullif(), regexp_like(), regexp_extract() |
| Window | ROW_NUMBER(), RANK(), DENSE_RANK(), LAG(), LEAD() |
Table references¶
-- By path
SELECT * FROM `/path/to/table`;
-- By RID
SELECT * FROM `ri.rid.for.table.1234`;
-- With branch
SELECT * FROM `/path/to/table`.branch_master;
Key syntax notes¶
- Subqueries must be wrapped in parentheses.
- Created tables are Iceberg by default but can be overridden with
USING <format>syntax. - Only Iceberg tables support
INSERT,UPDATE, andDELETE.
Table creation¶
CREATE [ OR REPLACE ] TABLE table_identifier [ ( col_name1 col_type1, ... ) ] [ USING table_format ] [ PARTITIONED BY ( expr [, ...] ) ] [ TBLPROPERTIES ( 'key'='value' [, ...] ) ] [ AS select_statement ]
PARTITIONED BY is only supported with Iceberg tables. TBLPROPERTIES sets Iceberg table properties ↗.
If not specified, the table format will be Iceberg. Valid table formats include:
- Iceberg:
iceberg - Parquet:
parquet - Avro:
avro
-- examples
CREATE TABLE `/path/to/table` AS
SELECT *
FROM `/path/to/table`;
CREATE TABLE `/path/to/table` USING parquet AS
SELECT *
FROM `/path/to/table`;
CREATE TABLE `/path/to/table`
(
id INT,
name STRING
);
CREATE OR REPLACE TABLE `/path/to/table` (id INT, name STRING);
CREATE TABLE `/path/to/table` (id INT, ts TIMESTAMP)
PARTITIONED BY (days(ts));
CREATE TABLE `/path/to/table` (id INT)
TBLPROPERTIES ('write.format.default'='parquet');
CREATE TABLE `/path/to/table`
PARTITIONED BY (id)
AS SELECT * FROM `/path/to/source`;
Table alteration¶
Iceberg tables support data appends with INSERT, UPDATE, and DELETE statements. Non-Iceberg tables do not currently support table alteration.
Insert¶
INSERT [ INTO ] table_identifier { VALUES ( { value | NULL } [ , ... ] ) [ , ( ... ) ] | query }
-- examples
INSERT INTO `/path/to/table` VALUES (1, 'foo'), (2, 'bar');
INSERT INTO `/path/to/table` SELECT id, name FROM `/path/to/table2`;
Update¶
UPDATE table_identifier
SET column_name = value [, ...]
[ WHERE condition ]
-- examples
UPDATE `/path/to/table`
SET col = 'new value';
UPDATE `/path/to/table`
SET col = 'new value'
WHERE col = 'old value';
Delete¶
DELETE FROM table_identifier WHERE condition
DELETE FROM `/path/to/table` WHERE col = 'to delete'
Alter metadata¶
Iceberg tables support schema and metadata changes with ALTER TABLE syntax. Refer to the Iceberg Spark DDL documentation ↗ for additional details.
Column operations¶
ALTER TABLE table_identifier ADD COLUMN col_name col_type [COMMENT 'comment']
ALTER TABLE table_identifier ADD COLUMNS (col_name col_type [COMMENT 'comment'], ...)
ALTER TABLE table_identifier DROP COLUMN col_name
ALTER TABLE table_identifier DROP COLUMNS (col_name, ...)
ALTER TABLE table_identifier RENAME COLUMN old_name TO new_name
ALTER TABLE table_identifier ALTER COLUMN col_name TYPE new_type
ALTER TABLE table_identifier ALTER COLUMN col_name COMMENT 'comment'
ALTER TABLE table_identifier ALTER COLUMN col_name SET NOT NULL
ALTER TABLE table_identifier ALTER COLUMN col_name DROP NOT NULL
-- examples
ALTER TABLE `/path/to/table` ADD COLUMN new_col STRING;
ALTER TABLE `/path/to/table` ADD COLUMNS (col1 INT, col2 STRING COMMENT 'description');
ALTER TABLE `/path/to/table` DROP COLUMN old_col;
ALTER TABLE `/path/to/table` RENAME COLUMN old_name TO new_name;
ALTER TABLE `/path/to/table` ALTER COLUMN col2 TYPE BIGINT;
Partition fields¶
ALTER TABLE table_identifier ADD PARTITION FIELD expr [AS alias]
ALTER TABLE table_identifier DROP PARTITION FIELD expr
ALTER TABLE table_identifier REPLACE PARTITION FIELD old_expr WITH new_expr [AS alias]
-- examples
ALTER TABLE `/path/to/table` ADD PARTITION FIELD days(ts);
ALTER TABLE `/path/to/table` ADD PARTITION FIELD bucket(16, id) AS id_bucket;
ALTER TABLE `/path/to/table` DROP PARTITION FIELD days(ts);
ALTER TABLE `/path/to/table` REPLACE PARTITION FIELD days(ts) WITH hours(ts);
Write ordering¶
ALTER TABLE table_identifier WRITE ORDERED BY col1 [ASC|DESC] [, ...]
ALTER TABLE table_identifier WRITE LOCALLY ORDERED BY col1 [ASC|DESC] [, ...]
ALTER TABLE table_identifier WRITE DISTRIBUTED BY PARTITION [LOCALLY ORDERED BY col1 [ASC|DESC] [, ...]]
ALTER TABLE table_identifier WRITE UNORDERED
-- examples
ALTER TABLE `/path/to/table` WRITE ORDERED BY col1, col2 DESC;
ALTER TABLE `/path/to/table` WRITE DISTRIBUTED BY PARTITION LOCALLY ORDERED BY col1;
ALTER TABLE `/path/to/table` WRITE UNORDERED;
Identifier fields¶
ALTER TABLE table_identifier SET IDENTIFIER FIELDS col1 [, ...]
ALTER TABLE table_identifier DROP IDENTIFIER FIELDS col1 [, ...]
-- examples
ALTER TABLE `/path/to/table` SET IDENTIFIER FIELDS col1, col2;
ALTER TABLE `/path/to/table` DROP IDENTIFIER FIELDS col1, col2;
Table properties¶
ALTER TABLE table_identifier SET TBLPROPERTIES ('key1'='value1' [, ...])
ALTER TABLE table_identifier UNSET TBLPROPERTIES ('key1' [, ...])
-- examples
ALTER TABLE `/path/to/table` SET TBLPROPERTIES ('write.format.default'='parquet');
ALTER TABLE `/path/to/table` SET TBLPROPERTIES ('write.format.default'='parquet', 'write.parquet.compression-codec'='zstd');
ALTER TABLE `/path/to/table` UNSET TBLPROPERTIES ('write.format.default');
Tags¶
ALTER TABLE table_identifier CREATE TAG [IF NOT EXISTS] tag_name [AS OF VERSION snapshot_id] [RETAIN n DAYS]
ALTER TABLE table_identifier CREATE OR REPLACE TAG tag_name [AS OF VERSION snapshot_id] [RETAIN n DAYS]
ALTER TABLE table_identifier DROP TAG [IF EXISTS] tag_name
ALTER TABLE table_identifier REPLACE TAG tag_name [AS OF VERSION snapshot_id] [RETAIN n DAYS]
-- examples
ALTER TABLE `/path/to/table` CREATE TAG v1 AS OF VERSION 42;
ALTER TABLE `/path/to/table` CREATE TAG IF NOT EXISTS v1 AS OF VERSION 42 RETAIN 7 DAYS;
ALTER TABLE `/path/to/table` CREATE OR REPLACE TAG v2 AS OF VERSION 100;
ALTER TABLE `/path/to/table` DROP TAG IF EXISTS v1;
ALTER TABLE `/path/to/table` REPLACE TAG v1 AS OF VERSION 50 RETAIN 14 DAYS;
Branches¶
ALTER TABLE table_identifier CREATE BRANCH [IF NOT EXISTS] branch_name [AS OF VERSION snapshot_id] [RETAIN n DAYS] [WITH SNAPSHOT RETENTION n SNAPSHOTS [n DAYS]]
ALTER TABLE table_identifier CREATE OR REPLACE BRANCH branch_name [AS OF VERSION snapshot_id] [RETAIN n DAYS] [WITH SNAPSHOT RETENTION n SNAPSHOTS [n DAYS]]
ALTER TABLE table_identifier DROP BRANCH [IF EXISTS] branch_name
ALTER TABLE table_identifier REPLACE BRANCH branch_name [AS OF VERSION snapshot_id] [RETAIN n DAYS]
-- examples
ALTER TABLE `/path/to/table` CREATE BRANCH feature_branch;
ALTER TABLE `/path/to/table` CREATE BRANCH IF NOT EXISTS my_branch AS OF VERSION 42 RETAIN 7 DAYS;
ALTER TABLE `/path/to/table` CREATE OR REPLACE BRANCH my_branch AS OF VERSION 10;
ALTER TABLE `/path/to/table` DROP BRANCH IF EXISTS old_branch;
ALTER TABLE `/path/to/table` REPLACE BRANCH my_branch AS OF VERSION 5 RETAIN 30 DAYS;
Transaction blocks¶
Transaction blocks group multiple statements to execute as a single atomic job.
BEGIN;
statement1;
statement2;
...
COMMIT;
Rules:
- All statements within
BEGIN/COMMITexecute together in a single job. - Transaction blocks cannot be nested.
- A transaction block must contain at least one write statement.
BEGINwithoutCOMMITorCOMMITwithoutBEGINis a parse error.
-- Create and populate a table atomically
BEGIN;
CREATE OR REPLACE TABLE `/path/to/table` (id INT, name STRING);
INSERT INTO `/path/to/table` VALUES (1, 'foo'), (2, 'bar');
COMMIT;
-- Multiple inserts into separate tables as a single job
BEGIN;
INSERT INTO `/path/to/table1` SELECT * FROM `/path/to/source`;
INSERT INTO `/path/to/table2` SELECT * FROM `/path/to/source`;
COMMIT;
Query composition¶
[ WITH with_query [ , ... ] ]
select_statement
[ { UNION | INTERSECT | EXCEPT } select_statement, ... ]
[ ORDER BY { expression [ ASC | DESC ] [ NULLS { FIRST | LAST } ] [, ...] } ]
[ WINDOW { window_name AS ( window_spec ) [, ...] } ]
[ LIMIT { ALL | expression } ]
WITH clause (common table expressions)¶
Where with_query is:
cte_name [ ( column_name [, ...] ) ] AS ( select_statement )
-- examples
WITH sales_summary (cust_id, total_sales, order_count) AS (
SELECT customer_id, SUM(total_amount), COUNT(*)
FROM `/data/orders`
GROUP BY customer_id
)
SELECT * FROM sales_summary WHERE total_sales > 10000;
SELECT statement¶
Where select_statement is:
SELECT [ ALL | DISTINCT ] { [ named_expression | regex_column_names | * ] [, ...] }
FROM { from_item [, ...] }
[ WHERE boolean_expression ]
[ GROUP BY { expression | ROLLUP(...) | CUBE(...) | GROUPING SETS(...) } [ , ... ] ]
[ HAVING boolean_expression ]
[ LIMIT { ALL | expression } [ OFFSET expression ] ]
Named expressions¶
Where named_expression is one of:
Expression with optional alias:
expression [ AS alias ]
-- examples
SELECT count(*) AS total_count FROM `/path/to/table`;
SELECT name, salary * 1.1 AS adjusted_salary FROM `/path/to/employees`;
Window function:
window_function() OVER ( window_spec )
window_function() OVER window_name
Where window_spec is:
[ PARTITION BY expression [, ...] ]
[ ORDER BY expression [ ASC | DESC ] [ NULLS { FIRST | LAST } ] [, ...] ]
-- examples
SELECT name, ROW_NUMBER() OVER (ORDER BY salary DESC) as rank
FROM `/path/to/employees`;
SELECT category, product_id,
ROW_NUMBER() OVER (PARTITION BY category ORDER BY price DESC) as category_rank
FROM `/path/to/products`;
-- Using named window
SELECT customer_id, order_date, total,
ROW_NUMBER() OVER w as order_sequence
FROM `/path/to/orders`
WINDOW w AS (PARTITION BY customer_id ORDER BY order_date);
FROM clause¶
Where from_item is one of:
Table reference¶
Tables can be referenced by filesystem path or resource identifier (RID). Both support optional branch specifications for querying specific Iceberg branches. Branch names that contain special characters need to be escaped with backticks.
{ `/path/to/table`[.branch_<branch name>] | `ri.rid.for.table.1234`[.branch_<branch name>] }
-- examples
SELECT * FROM `/path/to/table`;
SELECT * FROM `/path/to/table`.branch_master;
SELECT * FROM `/path/to/table` WHERE id = 1;
SELECT * FROM `ri.rid.for.table.1234`;
SELECT * FROM `ri.rid.for.table`.`branch_abc\xyz`;
Time travel queries to a specific version are possible with the VERSION AS OF syntax using a snapshot ID for Iceberg tables and a transaction RID for Foundry datasets.
-- Examples
SELECT * FROM `/path/to/table` VERSION AS OF `ri.foundry.main.transaction.1234`;
SELECT * FROM `/path/to/table` VERSION AS OF 1234567;
For Foundry datasets, you can read the rows added within a range of transactions with VERSION BETWEEN ... AND .... The query returns rows added strictly after the start transaction up through the end transaction. The start bound is exclusive and the end bound is inclusive. Both bounds must be transaction RIDs, not snapshot IDs.
-- Rows appended after txn.1234, up to and including txn.5678
SELECT * FROM `/path/to/table`
VERSION BETWEEN `ri.foundry.main.transaction.1234` AND `ri.foundry.main.transaction.5678`;
VERSION BETWEEN is not supported on Iceberg tables. Other time travel syntax is not currently supported.
Join¶
relation
{ CROSS | LEFT [ OUTER ] | RIGHT [ OUTER ] | FULL [ OUTER ] }
JOIN relation
ON boolean_expression
-- examples
SELECT * FROM `/path/to/table` LEFT JOIN `/path/to/table2` ON id = id2;
SELECT * FROM `/path/to/table` RIGHT OUTER JOIN `/path/to/table2` ON id = id2;
Subquery¶
( select_statement )
-- examples
SELECT * FROM (SELECT * FROM `/path/to/table`);
SELECT * FROM (SELECT * FROM `/path/to/table`) WHERE id = 1;
SELECT * FROM (
VALUES (9001, 1001, 1, DATE '2023-06-01', 'credit_card', 1329.98, 'completed',
TIMESTAMP '2023-06-01 11:00:00')
) AS orders(order_id, customer_id, product_id, order_date, payment_method, total, status, created_at);
:::callout{theme="neutral"} Subqueries must be wrapped in parentheses. This is enforced in Foundry even though it is not always required in Spark SQL. :::
Complete query examples¶
-- Complex query with joins and filtering
SELECT DISTINCT o.order_id,
c.name,
o.total_amount,
p.category
FROM `/data/orders` o
LEFT JOIN `/data/customers` c ON o.customer_id = c.id
CROSS JOIN (SELECT product_id, category
FROM `/data/products`
WHERE category = 'Electronics') p ON o.product_id = p.product_id
WHERE o.order_date > DATE '2024-01-01'
AND o.total_amount > 1000.0
ORDER BY o.total_amount DESC
LIMIT 100;
-- Aggregation with HAVING clause
SELECT c.name,
COUNT(o.order_id) as order_count,
SUM(o.total_amount) as total_spent
FROM `/data/customers` c
LEFT JOIN `/data/orders` o ON c.id = o.customer_id
GROUP BY c.name
HAVING COUNT(o.order_id) > 0;
-- UNION query with multiple joins
SELECT emp_id, name, dept, 'Active' AS status
FROM `/hr/employees` e
LEFT OUTER JOIN `/hr/departments` d ON e.dept_id = d.id
WHERE e.active = true
UNION
SELECT emp_id, name, dept, 'Inactive' AS status
FROM (SELECT *
FROM `/hr/employees`
WHERE termination_date < current_timestamp()) e
RIGHT JOIN `/hr/departments` d ON e.dept_id = d.id
WHERE e.active = false
ORDER BY dept ASC, status DESC NULLS LAST;
SQL hints¶
SQL hints provide additional directives to the query engine. Hints are specified using the following syntax:
/*+ hint1, hint2(opts...) */
The following hints are currently supported:
adhoc- Run a write query as an adhoc query. The new query is added as a separate target alongside any existing targets on the table, preserving the existing default target. A direct rebuild of the table will still use the original default target, not the adhoc one.
/*+ adhoc */
CREATE OR REPLACE TABLE `/path/to/table` AS SELECT * FROM `/path/to/source`;
Iceberg references¶
Foundry SQL supports Iceberg references to query metadata. Review the official documentation ↗.
-- examples
SELECT * FROM `/path/to/table/`.files;
SELECT * FROM `/path/to/table/`.snapshots;
Introspection¶
Use SHOW FUNCTIONS to list the functions supported by Foundry SQL. The statement returns three columns: name, kind (one of scalar, aggregate, or window), and signature. An optional LIKE pattern filters by function name, where % matches any sequence of characters and _ matches a single character.
SHOW FUNCTIONS;
SHOW FUNCTIONS LIKE 'abs%';
Functions¶
Functions are available to transforms data in more complex ways. Refer to the Spark SQL documentation ↗ for detailed information on each function below.
Array functions¶
| Function | Description | Example | Furnace | Ontology |
|---|---|---|---|---|
array(expr, ...) |
Create an array with given elements | SELECT array(1, 2, 3) |
Supported | Supported |
array_distinct(array) |
Remove duplicate values from array | SELECT array_distinct(tags) FROM table |
Supported | Not supported |
array_intersect(array1, array2) |
Elements in both arrays without duplicates | SELECT array_intersect(arr1, arr2) FROM table |
Supported | Not supported |
array_union(array1, array2) |
Elements in either array without duplicates | SELECT array_union(arr1, arr2) FROM table |
Supported | Not supported |
array_except(array1, array2) |
Elements in array1 but not in array2 | SELECT array_except(arr1, arr2) FROM table |
Supported | Not supported |
array_join(array, delimiter [, nullReplacement]) |
Concatenate array elements with delimiter | SELECT array_join(tags, ',') FROM table |
Supported | Not supported |
array_max(array) |
Maximum value in array | SELECT array_max(ARRAY(1, 5, 3)) |
Supported | Not supported |
array_min(array) |
Minimum value in array | SELECT array_min(ARRAY(1, 5, 3)) |
Supported | Not supported |
array_position(array, element) |
Position of first occurrence (1-based, 0 if not found) | SELECT array_position(tags, 'item') FROM table |
Supported | Not supported |
array_remove(array, element) |
Remove all matching elements | SELECT array_remove(tags, 'old') FROM table |
Supported | Not supported |
array_size(array) |
Number of elements in array | SELECT array_size(tags) FROM table |
Supported | Not supported |
array_contains(array, value) |
True if array contains value | SELECT array_contains(tags, 'new') FROM table |
Supported | Not supported |
array_repeat(element, count) |
Create array with element repeated count times | SELECT array_repeat('x', 5) |
Supported | Not supported |
arrays_overlap(array1, array2) |
True if arrays have common non-null elements | SELECT arrays_overlap(arr1, arr2) FROM table |
Supported | Not supported |
flatten(arrayOfArrays) |
Flatten nested arrays into single array | SELECT flatten(nested_arrays) FROM table |
Supported | Not supported |
sequence(start, stop, step) |
Generate sequence of integers | SELECT sequence(1, 10, 2) |
Supported | Not supported |
shuffle(array) |
Random permutation of array | SELECT shuffle(tags) FROM table |
Supported | Not supported |
slice(array, start, length) |
Subset of array from start with length | SELECT slice(tags, 1, 2) FROM table |
Supported | Not supported |
sort_array(array[, ascendingOrder]) |
Sort array in ascending (default) or descending order | SELECT sort_array(tags) FROM table |
Supported | Not supported |
array_sort(array) |
Sort array in ascending order with nulls last | SELECT array_sort(tags) FROM table |
Supported | Not supported |
get(array, index) |
Returns element at given index (0-based) | SELECT get(tags, 0) FROM table |
Supported | Not supported |
element_at(array, index) |
Returns element at given index (1-based; negative counts from the end) | SELECT element_at(tags, 1) FROM table |
Supported | Not supported |
size(array) |
Number of elements in array (also accepts a map) | SELECT size(tags) FROM table |
Supported | Not supported |
cardinality(array) |
Number of elements in array (also accepts a map) | SELECT cardinality(tags) FROM table |
Supported | Not supported |
explode(array) |
Generates a new row for each element in the array | SELECT explode(tags) FROM table |
Supported | Not supported |
Map functions¶
| Function | Description | Example | Furnace | Ontology |
|---|---|---|---|---|
map(key1, value1, key2, value2, ...) |
Create a map from key-value pairs | SELECT map('a', 1, 'b', 2) |
Supported | Not supported |
map_keys(map) |
Returns an array of the map's keys | SELECT map_keys(attributes) FROM table |
Supported | Not supported |
map_values(map) |
Returns an array of the map's values | SELECT map_values(attributes) FROM table |
Supported | Not supported |
element_at(map, key) |
Returns the value for the given key | SELECT element_at(attributes, 'color') FROM table |
Supported | Not supported |
size(map) |
Number of entries in map (also accepts an array) | SELECT size(attributes) FROM table |
Supported | Not supported |
cardinality(map) |
Number of entries in map (also accepts an array) | SELECT cardinality(attributes) FROM table |
Supported | Not supported |
Struct functions¶
| Function | Description | Example | Furnace | Ontology |
|---|---|---|---|---|
struct(expr1, expr2, ...) |
Create a struct from expressions | SELECT struct('red', 'large', 2.0) |
Supported | Not supported |
Date and timestamp functions¶
| Function | Description | Example | Furnace | Ontology |
|---|---|---|---|---|
current_timestamp() |
Returns current timestamp | SELECT current_timestamp() |
Supported | Supported |
current_date() |
Returns current date | SELECT current_date() |
Supported | Supported |
now() |
Returns current timestamp (alias for current_timestamp) |
SELECT now() |
Supported | Not supported |
add_months(date, num_months) |
Add months to date | add_months(order_date, 3) |
Supported | Not supported |
date_add(expr, num_days) |
Add days to date | date_add(order_date, 7) |
Supported | Supported |
date_format(expr, format) |
Format date or timestamp as a string | SELECT date_format(TIMESTAMP '2024-01-15 10:30:00', 'yyyy-MM-dd') |
Supported | Not supported |
date_sub(expr, num_days) |
Subtract days from date | date_sub(order_date, 7) |
Supported | Supported |
from_unixtime(expr [, format]) |
Convert Unix timestamp to timestamp | from_unixtime(1609459200) |
Supported | Not supported |
unix_timestamp([expr] [, format]) |
Convert timestamp to Unix timestamp | unix_timestamp(current_timestamp()) |
Supported | Not supported |
day(date) |
Extract day of month from date | SELECT day(order_date) FROM table |
Supported | Supported |
hour(timestamp) |
Extract hour from timestamp | SELECT hour(order_timestamp) FROM table |
Supported | Not supported |
minute(timestamp) |
Extract minute from timestamp | SELECT minute(order_timestamp) FROM table |
Supported | Not supported |
month(date) |
Extract month from date | SELECT month(order_date) FROM table |
Supported | Supported |
quarter(date) |
Extract quarter from date | SELECT quarter(order_date) FROM table |
Supported | Supported |
second(timestamp) |
Extract second from timestamp | SELECT second(order_timestamp) FROM table |
Supported | Not supported |
year(date) |
Extract year from date | SELECT year(order_date) FROM table |
Supported | Supported |
date_trunc(unit, timestamp) |
Truncate timestamp to specified unit | SELECT date_trunc('day', current_timestamp()) |
Supported | Not supported |
trunc(date, fmt) |
Truncate date to YEAR, QUARTER, MONTH, or WEEK granularity |
SELECT trunc(order_date, 'MM') |
Supported | Not supported |
to_date(str [, format]) |
Convert string to date | SELECT to_date('2024-01-15') |
Supported | Not supported |
to_timestamp(expr [, format]) |
Convert to timestamp | SELECT to_timestamp('2024-01-15 10:30:00') |
Supported | Not supported |
timestamp(expr) |
Short-form cast to timestamp | SELECT timestamp('2024-01-15 10:30:00') |
Supported | Not supported |
from_utc_timestamp(ts, tz) |
Interpret a UTC timestamp in the given timezone | SELECT from_utc_timestamp(ts, 'America/Los_Angeles') |
Supported | Not supported |
to_utc_timestamp(ts, tz) |
Convert a timestamp in the given timezone to UTC | SELECT to_utc_timestamp(ts, 'America/Los_Angeles') |
Supported | Not supported |
dayofmonth(date) |
Extract day of month from date (alias for day) |
SELECT dayofmonth(order_date) FROM table |
Supported | Not supported |
dayofweek(date) |
Day of week (1 = Sunday, 7 = Saturday) | SELECT dayofweek(order_date) FROM table |
Supported | Not supported |
weekday(date) |
Day of week (0 = Monday, 6 = Sunday) | SELECT weekday(order_date) FROM table |
Supported | Not supported |
dayofyear(date) |
Day of year (1 to 366) | SELECT dayofyear(order_date) FROM table |
Supported | Not supported |
weekofyear(date) |
ISO week of year | SELECT weekofyear(order_date) FROM table |
Supported | Not supported |
last_day(date) |
Last day of the month containing date | SELECT last_day(order_date) FROM table |
Supported | Not supported |
next_day(start_date, day_of_week) |
First date after start_date that falls on the named day |
SELECT next_day(order_date, 'Mon') FROM table |
Supported | Not supported |
date_diff(date1, date2) |
Subtract two dates | SELECT date_diff('2020-01-01', '2020-01-02') |
Supported | Supported |
months_between(ts1, ts2 [, roundOff]) |
Fractional months between two timestamps (rounds to eight digits unless roundOff is false) |
SELECT months_between('1997-02-28 10:30:00', '1996-10-30') |
Supported | Not supported |
timestamp_seconds(seconds) |
Timestamp from seconds since the UTC epoch (accepts fractional values) | SELECT timestamp_seconds(1230219000) |
Supported | Not supported |
timestamp_millis(millis) |
Timestamp from milliseconds since the UTC epoch | SELECT timestamp_millis(1230219000123) |
Supported | Not supported |
timestamp_micros(micros) |
Timestamp from microseconds since the UTC epoch | SELECT timestamp_micros(1230219000123123) |
Supported | Not supported |
unix_seconds(timestamp) |
Seconds since 1970-01-01 UTC (truncates higher precision) | SELECT unix_seconds(current_timestamp()) |
Supported | Not supported |
unix_millis(timestamp) |
Milliseconds since 1970-01-01 UTC (truncates higher precision) | SELECT unix_millis(current_timestamp()) |
Supported | Not supported |
unix_micros(timestamp) |
Microseconds since 1970-01-01 UTC | SELECT unix_micros(current_timestamp()) |
Supported | Not supported |
Mathematical functions¶
| Function | Description | Example | Furnace | Ontology |
|---|---|---|---|---|
abs(x) |
Absolute value | SELECT abs(-5) |
Supported | Supported |
acos(x) |
Arc cosine | SELECT acos(0.5) |
Supported | Not supported |
asin(x) |
Arc sine | SELECT asin(0.5) |
Supported | Not supported |
atan(x) |
Arc tangent | SELECT atan(1) |
Supported | Not supported |
atan2(y, x) |
Arc tangent of y/x | SELECT atan2(1, 1) |
Supported | Not supported |
cbrt(x) |
Cube root | SELECT cbrt(27) |
Supported | Not supported |
ceil(x) |
Round up to nearest integer | SELECT ceil(3.7) |
Supported | Supported |
ceiling(x) |
Round up to nearest integer | SELECT ceiling(3.7) |
Supported | Supported |
cos(x) |
Cosine | SELECT cos(pi()) |
Supported | Not supported |
cosh(x) |
Hyperbolic cosine | SELECT cosh(0) |
Supported | Not supported |
degrees(x) |
Convert radians to degrees | SELECT degrees(pi()) |
Supported | Not supported |
e() |
Euler's number | SELECT e() |
Supported | Not supported |
exp(x) |
e raised to power x | SELECT exp(1) |
Supported | Not supported |
floor(x) |
Round down to nearest integer | SELECT floor(3.7) |
Supported | Supported |
isnan(x) |
True if x is NaN (NULL input returns false) | SELECT isnan(price) FROM table |
Supported | Not supported |
nanvl(x, y) |
Returns x unless it is NaN, in which case returns y | SELECT nanvl(price, 0) FROM table |
Supported | Not supported |
ln(x) |
Natural logarithm | SELECT ln(e()) |
Supported | Not supported |
log(base, x) |
Logarithm with specified base | SELECT log(2, 8) |
Supported | Not supported |
log10(x) |
Base 10 logarithm | SELECT log10(100) |
Supported | Not supported |
log2(x) |
Base 2 logarithm | SELECT log2(8) |
Supported | Not supported |
mod(n, m) |
Modulus (remainder) | SELECT mod(10, 3) |
Supported | Supported |
pi() |
Pi constant | SELECT pi() |
Supported | Not supported |
pow(x, p) |
x raised to power p | SELECT pow(2, 3) |
Supported | Supported |
power(x, p) |
x raised to power p | SELECT power(2, 3) |
Supported | Supported |
radians(x) |
Convert degrees to radians | SELECT radians(180) |
Supported | Not supported |
rand([seed]) |
Random value between 0 and 1. Optional seed is honored on Spark but ignored on Trino. | SELECT rand() |
Supported | Not supported |
random([seed]) |
Random value between 0 and 1. Optional seed is honored on Spark but ignored on Trino. | SELECT random() |
Supported | Not supported |
round(x, d) |
Round to d decimal places | SELECT round(3.7, 1) |
Supported | Supported |
sign(x) |
Sign function (-1, 0, 1) | SELECT sign(-5) |
Supported | Not supported |
sin(x) |
Sine | SELECT sin(pi() / 2) |
Supported | Not supported |
sinh(x) |
Hyperbolic sine | SELECT sinh(0) |
Supported | Not supported |
sqrt(x) |
Square root | SELECT sqrt(16) |
Supported | Not supported |
tan(x) |
Tangent | SELECT tan(pi() / 4) |
Supported | Not supported |
tanh(x) |
Hyperbolic tangent | SELECT tanh(0) |
Supported | Not supported |
bin(x) |
Binary (base-2) string representation of a long value (negatives use two's-complement 64-bit) | SELECT bin(13) |
Supported | Not supported |
String functions¶
| Function | Description | Example | Furnace | Ontology |
|---|---|---|---|---|
chr(n) |
Returns character from Unicode code point | SELECT chr(65) |
Supported | Not supported |
char(n) |
Returns character from code point (alias for chr) | SELECT char(65) |
Supported | Not supported |
contains(str, substr) |
True if string contains substring | SELECT contains('hello', 'ell') |
Supported | Not supported |
concat_ws(sep, str1, ...) |
Concatenate strings with separator | SELECT concat_ws(',', 'a', 'b') |
Supported | Not supported |
concat(str1, ...) |
Concatenate strings | SELECT concat('a', 'b') |
Supported | Supported |
length(str) |
Length of string in characters | SELECT length('hello') |
Supported | Supported |
len(str) |
Length of string (alias for length); accepts string or binary | SELECT len('hello') |
Supported | Not supported |
char_length(str) |
Length of string (alias for length) | SELECT char_length('hello') |
Supported | Supported |
character_length(str) |
Length of string (alias for length) | SELECT character_length('hello') |
Supported | Supported |
bit_length(expr) |
Bit length of string or binary data (UTF-8 byte length multiplied by 8 for strings) | SELECT bit_length('Spark SQL') |
Supported | Not supported |
octet_length(expr) |
Byte length of string (UTF-8) or binary | SELECT octet_length('Spark SQL') |
Supported | Not supported |
format_number(num, places_or_format) |
Format a number like '#,###.##' rounded to N places, or with a user-supplied decimal-format pattern |
SELECT format_number(12332.123456, 4) |
Supported | Not supported |
lower(str) |
Convert to lowercase | SELECT lower('HELLO') |
Supported | Supported |
lcase(str) |
Convert to lowercase (alias for lower) | SELECT lcase('HELLO') |
Supported | Not supported |
upper(str) |
Convert to uppercase | SELECT upper('hello') |
Supported | Supported |
ucase(str) |
Convert to uppercase (alias for upper) | SELECT ucase('hello') |
Supported | Not supported |
initcap(str) |
Capitalize the first letter of each word | SELECT initcap('hello world') |
Supported | Not supported |
lpad(str, len, pad) |
Left pad string to length | SELECT lpad('hi', 5, 'x') |
Supported | Not supported |
rpad(str, len, pad) |
Right pad string to length | SELECT rpad('hi', 5, 'x') |
Supported | Not supported |
ltrim(str) |
Remove leading whitespace | SELECT ltrim(' hello') |
Supported | Not supported |
rtrim(str) |
Remove trailing whitespace | SELECT rtrim('hello ') |
Supported | Not supported |
trim(str) |
Remove leading and trailing whitespace | SELECT trim(' hello ') |
Supported | Supported |
replace(str, search, replace) |
Replace all occurrences | SELECT replace('hello', 'l', 'x') |
Supported | Supported |
reverse(str) |
Reverse a string (also works on arrays) | SELECT reverse('hello') |
Supported | Not supported |
right(str, len) |
Rightmost len characters of string |
SELECT right('hello', 3) |
Supported | Not supported |
substr(str, pos, len) |
Extract substring | SELECT substr('hello', 2, 3) |
Supported | Supported |
substring(str, pos, len) |
Extract substring | SELECT substring('hello', 2, 3) |
Supported | Supported |
substring_index(str, delim, count) |
Substring before count occurrences of the delimiter (negative counts from the end) |
SELECT substring_index('a.b.c.d', '.', 2) |
Supported | Not supported |
split(str, delimiter) |
Split string into array | SELECT split('a,b,c', ',') |
Supported | Not supported |
split_part(str, delimiter, index) |
Get part from split string (1-based) | SELECT split_part('a,b,c', ',', 2) |
Supported | Not supported |
instr(str, substr) |
Find substring position (1-based, 0 if not found) | SELECT instr('hello', 'll') |
Supported | Not supported |
locate(substr, str [, pos]) |
Find substring position starting at pos (1-based, 0 if not found) |
SELECT locate('ll', 'hello') |
Supported | Not supported |
position(substr IN str) |
Find substring position (SQL standard syntax) | SELECT position('ll' IN 'hello') |
Supported | Not supported |
startswith(str, prefix) |
Check if string starts with prefix | SELECT startswith('hello', 'he') |
Supported | Not supported |
luhn_check(str) |
Validate string using Luhn algorithm | SELECT luhn_check('79927398713') |
Supported | Not supported |
Conditional functions¶
| Function | Description | Example | Furnace | Ontology |
|---|---|---|---|---|
coalesce(expr1, expr2, ...) |
Returns first non-null expression | SELECT coalesce(col, 'default') |
Supported | Supported |
nullif(expr1, expr2) |
Returns null if expr1 equals expr2, otherwise returns expr1 |
SELECT 100.0 / nullif(count, 0) |
Supported | Not supported |
if(cond, then, else) |
Returns then if cond is true, otherwise else |
SELECT if(price > 100, 'high', 'low') |
Supported | Not supported |
nvl(expr1, expr2) |
Returns expr2 if expr1 is null, otherwise expr1 |
SELECT nvl(col, 'default') |
Supported | Not supported |
ifnull(expr1, expr2) |
Alias for nvl |
SELECT ifnull(col, 'default') |
Supported | Not supported |
nvl2(expr1, expr2, expr3) |
Returns expr2 if expr1 is not null, otherwise expr3 |
SELECT nvl2(col, 'present', 'missing') |
Supported | Not supported |
greatest(expr1, expr2, ...) |
Largest non-null value (null only when all inputs are null) | SELECT greatest(a, b, c) FROM table |
Supported | Not supported |
least(expr1, expr2, ...) |
Smallest non-null value (null only when all inputs are null) | SELECT least(a, b, c) FROM table |
Supported | Not supported |
regexp_like(str, pattern) |
Test if string matches regex pattern | SELECT regexp_like('hello123', '[0-9]+') |
Supported | Not supported |
regexp_extract(str, pattern) |
Extract first substring matching pattern | SELECT regexp_extract('hello123', '[0-9]+') |
Supported | Supported |
regexp_extract_all(str, pattern) |
Extract all substrings matching pattern | SELECT regexp_extract_all('a1b2c3', '[0-9]+') |
Supported | Not supported |
regexp_replace(str, pattern, replacement) |
Replace all matches with replacement string | SELECT regexp_replace('hello123', '[0-9]+', 'X') |
Supported | Supported |
regexp_count(str, pattern) |
Count occurrences of pattern in string | SELECT regexp_count('a1b2c3', '[0-9]+') |
Supported | Not supported |
rlike(str, regexp) |
True if string matches the Java regex (partial match) | SELECT rlike('hello123', '\\d+') |
Supported | Not supported |
regexp(str, regexp) |
Alias for rlike |
SELECT regexp('hello123', '\\d+') |
Supported | Not supported |
Hashing functions¶
| Function | Description | Example | Furnace | Ontology |
|---|---|---|---|---|
md5(expr) |
MD5 128-bit checksum as a lowercase hex string | SELECT md5('Spark') |
Supported | Not supported |
sha(expr) |
SHA-1 hash as a hex string (alias for sha1) | SELECT sha('Spark') |
Supported | Not supported |
sha1(expr) |
SHA-1 hash as a hex string | SELECT sha1('Spark') |
Supported | Not supported |
sha2(expr, bitLength) |
SHA-2 hash (224, 256, 384, or 512; 0 means 256). Invalid bit length returns null | SELECT sha2('Spark', 256) |
Supported | Not supported |
crc32(expr) |
CRC32 checksum as a bigint | SELECT crc32('Spark') |
Supported | Not supported |
Conversion functions¶
| Function | Description | Example | Furnace | Ontology |
|---|---|---|---|---|
cast(expr AS type) |
Convert value to specified type | cast(count() AS float) |
Supported | Supported |
try_cast(expr AS type) |
Convert value to specified type, returns NULL on failure | try_cast('abc' AS int) |
Supported | Not supported |
Aggregate functions¶
| Function | Description | Example | Furnace | Ontology |
|---|---|---|---|---|
count() |
Count all rows | SELECT count() FROM table |
Supported | Supported |
count(DISTINCT col) |
Count distinct values in column | SELECT count(DISTINCT id) FROM table |
Supported | Supported if single property |
approx_count_distinct(col) |
Approximate count of distinct values (HyperLogLog) | SELECT approx_count_distinct(id) FROM table |
Supported | Not supported |
sum(col) |
Sum values in column | SELECT sum(cost) FROM table |
Supported | Supported |
avg(col) |
Mean average values in column | SELECT avg(weight) FROM table |
Supported | Supported |
mean(col) |
Mean average values in column (alias for avg) | SELECT mean(weight) FROM table |
Supported | Not supported |
min(col) |
Minimum value in column | SELECT min(price) FROM table |
Supported | Supported |
max(col) |
Maximum value in column | SELECT max(price) FROM table |
Supported | Supported |
min_by(x, y) |
Value of x associated with minimum value of y | SELECT min_by(name, age) FROM table |
Supported | Not supported |
max_by(x, y) |
Value of x associated with maximum value of y | SELECT max_by(name, age) FROM table |
Supported | Not supported |
first(col[, ignoreNulls]) |
First value in a group (always nullable) | SELECT first(name) FROM table GROUP BY category |
Supported | Not supported |
last(col[, ignoreNulls]) |
Last value in a group (always nullable) | SELECT last(name) FROM table GROUP BY category |
Supported | Not supported |
count_if(condition) |
Count rows where condition is true | SELECT count_if(age > 18) FROM table |
Supported | Not supported |
bool_and(condition) |
True if all values are true | SELECT bool_and(active) FROM table |
Supported | Not supported |
bool_or(condition) |
True if any value is true | SELECT bool_or(active) FROM table |
Supported | Not supported |
bit_and(col) |
Bitwise AND of all non-null integer values | SELECT bit_and(flags) FROM table |
Supported | Not supported |
bit_or(col) |
Bitwise OR of all non-null integer values | SELECT bit_or(flags) FROM table |
Supported | Not supported |
bit_xor(col) |
Bitwise XOR of all non-null integer values | SELECT bit_xor(flags) FROM table |
Supported | Not supported |
every(condition) |
True if all values are true (alias for bool_and) | SELECT every(active) FROM table |
Supported | Not supported |
stddev(col) |
Sample standard deviation | SELECT stddev(salary) FROM table |
Supported | Supported |
stddev_pop(col) |
Population standard deviation | SELECT stddev_pop(salary) FROM table |
Supported | Supported |
stddev_samp(col) |
Sample standard deviation (alias for stddev) | SELECT stddev_samp(salary) FROM table |
Supported | Supported |
variance(col) |
Sample variance | SELECT variance(salary) FROM table |
Supported | Not supported |
var_pop(col) |
Population variance | SELECT var_pop(salary) FROM table |
Supported | Not supported |
var_samp(col) |
Sample variance | SELECT var_samp(salary) FROM table |
Supported | Not supported |
corr(y, x) |
Pearson correlation coefficient | SELECT corr(sales, ads) FROM table |
Supported | Not supported |
covar_pop(y, x) |
Population covariance | SELECT covar_pop(y, x) FROM table |
Supported | Not supported |
covar_samp(y, x) |
Sample covariance | SELECT covar_samp(y, x) FROM table |
Supported | Not supported |
skewness(col) |
Skewness | SELECT skewness(values) FROM table |
Supported | Not supported |
regr_intercept(y, x) |
Linear regression intercept | SELECT regr_intercept(y, x) FROM table |
Supported | Not supported |
regr_slope(y, x) |
Linear regression slope | SELECT regr_slope(y, x) FROM table |
Supported | Not supported |
grouping(col) |
Returns 1 if column is aggregated, 0 otherwise (for use with ROLLUP/CUBE/GROUPING SETS) | SELECT grouping(category) FROM table GROUP BY ROLLUP(category) |
Supported | Not supported |
collect_set(col) |
Returns an array of unique values from column | SELECT collect_set(name) FROM table GROUP BY category |
Supported | Not supported |
collect_list(col) |
Returns an array of all values from column (preserves duplicates) | SELECT collect_list(name) FROM table GROUP BY category |
Supported | Not supported |
percentile(col, p[, freq]) |
Exact percentile of column for percentage p (scalar or array) |
SELECT percentile(salary, 0.5) FROM table |
Supported | Not supported |
percentile_approx(col, p[, accuracy]) |
Approximate percentile; preserves input type (numeric, date, timestamp, interval) | SELECT percentile_approx(salary, ARRAY(0.25, 0.5, 0.75)) FROM table |
Supported | Not supported |
approx_percentile(col, p[, accuracy]) |
Alias for percentile_approx |
SELECT approx_percentile(salary, 0.5) FROM table |
Supported | Not supported |
Window functions¶
| Function | Description | Example | Furnace | Ontology |
|---|---|---|---|---|
ROW_NUMBER() |
Assigns sequential row number within partition | ROW_NUMBER() OVER (ORDER BY col) |
Supported | Supported |
RANK() |
Rank with gaps for ties | RANK() OVER (ORDER BY salary DESC) |
Supported | Supported |
DENSE_RANK() |
Rank without gaps for ties | DENSE_RANK() OVER (ORDER BY salary DESC) |
Supported | Supported |
PERCENT_RANK() |
Percentage ranking of value in group | PERCENT_RANK() OVER (ORDER BY salary) |
Supported | Not supported |
CUME_DIST() |
Cumulative distribution of value | CUME_DIST() OVER (ORDER BY salary) |
Supported | Not supported |
NTILE(n) |
Divide rows into n buckets | NTILE(4) OVER (ORDER BY salary) |
Supported | Not supported |
LAG(expr[, offset[, default]]) |
Value from previous row | LAG(salary, 1) OVER (ORDER BY date) |
Supported | Supported |
LEAD(expr[, offset[, default]]) |
Value from next row | LEAD(salary, 1) OVER (ORDER BY date) |
Supported | Supported |
FIRST_VALUE(expr) |
First value in window frame | FIRST_VALUE(salary) OVER (ORDER BY date) |
Supported | Supported |
LAST_VALUE(expr) |
Last value in window frame | LAST_VALUE(salary) OVER (ORDER BY date) |
Supported | Supported |
NTH_VALUE(expr, n) |
Nth value in window frame | NTH_VALUE(salary, 2) OVER (ORDER BY date) |
Supported | Supported |
| Aggregate functions | Standard aggregates (count, sum, avg, min, max, stddev, variance, etc.) can be used as window functions | SUM(amount) OVER (PARTITION BY customer_id) |
Supported | Only count, sum, min max, avg |
Window functions require an OVER clause with optional PARTITION BY and ORDER BY:
-- examples
SELECT id, name, ROW_NUMBER() OVER (ORDER BY name) as row_num
FROM `/path/to/table`;
SELECT category, name, price,
ROW_NUMBER() OVER (PARTITION BY category ORDER BY price DESC) as rank_in_category
FROM `/path/to/products`;
SELECT order_id, customer_id, order_date, total_amount,
SUM(total_amount) OVER (PARTITION BY customer_id ORDER BY order_date) as running_total,
AVG(total_amount) OVER (PARTITION BY customer_id) as customer_avg,
COUNT(*) OVER (PARTITION BY customer_id) as customer_order_count
FROM `/path/to/orders`;
SELECT id, amount,
ROW_NUMBER() OVER w as sequence_num
FROM `/path/to/transactions`
WINDOW w AS (PARTITION BY customer_id ORDER BY transaction_date);
JSON functions¶
| Function | Description | Example | Furnace | Ontology |
|---|---|---|---|---|
get_json_object(json_str, path) |
Extract a value from a JSON string using a JSONPath expression | SELECT get_json_object('{"a":1}', '$.a') |
Supported | Not supported |
json_array_length(json_str) |
Returns the number of elements in a JSON array | SELECT json_array_length('[1,2,3]') |
Supported | Not supported |
json_object_keys(json_str) |
Returns the keys of a JSON object as an array | SELECT json_object_keys('{"a":1,"b":2}') |
Supported | Not supported |
to_json(expr) |
Convert a struct or map to a JSON string | SELECT to_json(map('a', 1)) |
Supported | Not supported |
Misc functions¶
| Function | Description | Example | Furnace | Ontology |
|---|---|---|---|---|
zorder() |
Iceberg Z-order optimization (Iceberg only) | CALL zorder('table', 'col1,col2') |
Supported | Not supported |
typeof(expr) |
Returns the runtime type name of the expression | SELECT typeof(price) FROM table |
Supported | Not supported |
Geospatial functions¶
| Function | Description | Example | Furnace | Ontology |
|---|---|---|---|---|
st_point(x, y) |
Creates a point geometry from X, Y coordinates | SELECT st_point(-122.4, 37.8) |
Supported | Not supported |
st_contains(geom1, geom2) |
Returns true if geom1 contains geom2 | WHERE st_contains(boundary, location) |
Supported | Not supported |
st_equals(geom1, geom2) |
Returns true if geometries are spatially equal | SELECT st_equals(point1, point2) |
Supported | Not supported |
st_geomfromgeojson(json) |
Creates geometry from GeoJSON string | SELECT st_geomfromgeojson('{"type":"Point","coordinates":[0,0]}') |
Supported | Not supported |
st_astext(geom) |
Returns geometry as Well-Known Text (WKT) string | SELECT st_astext(st_point(0, 0)) |
Supported | Not supported |
st_h3cellids(geom, level, fullCover) |
Returns an array of H3 cells that cover the geometry | SELECT st_h3cellids(st_point(0, 0), 8, true) |
Supported | Not supported |
:::callout{theme="neutral"}
Geometry columns cannot be returned directly. Use st_astext() to convert to WKT format.
:::
Arithmetic operators¶
| Operator | Description | Example | Furnace | Ontology |
|---|---|---|---|---|
+ |
Plus | SELECT count() + 5 |
Supported | Supported |
- |
Minus | SELECT count() - 1 |
Supported | Supported |
* |
Times | SELECT sum(weight * 2) |
Supported | Supported |
/ |
Divide | SELECT (avg(a) + avg(b)) / 2 |
Supported | Supported |
Boolean operators¶
| Operator | Description | Example | Furnace | Ontology |
|---|---|---|---|---|
= |
Equal to | WHERE id = 1 |
Supported | Supported |
<>, != |
Not equal to | WHERE status <> 'inactive' |
Supported | Supported |
< |
Less than | WHERE age < 18 |
Supported | Supported |
<= |
Less than or equal to | WHERE price <= 100.0 |
Supported | Supported |
> |
Greater than | WHERE salary > 50000 |
Supported | Supported |
>= |
Greater than or equal to | WHERE score >= 90 |
Supported | Supported |
AND |
Logical AND | WHERE active = true AND verified = true |
Supported | Supported |
OR |
Logical OR | WHERE status = 'new' OR status = 'pending' |
Supported | Supported |
NOT |
Logical NOT | WHERE NOT deleted |
Supported | Supported |
IN |
Value in list | WHERE category IN ('A', 'B', 'C') |
Supported | Supported |
LIKE |
Pattern matching | WHERE name LIKE 'John%' |
Supported | Supported |
EXISTS |
True if subquery returns rows | WHERE EXISTS (SELECT 1 FROM orders WHERE orders.customer_id = customers.id) |
Supported | Supported |
Other expressions¶
| Expression | Description | Example | Furnace | Ontology |
|---|---|---|---|---|
CASE ... WHEN |
Switch expression | SELECT CASE WHEN mark >= 70 THEN 'A' WHEN mark >= 60 THEN 'B' ELSE 'C' END |
Supported | Supported |
INTERVAL |
Time interval literal | SELECT order_date + INTERVAL 30 DAY, WHERE created_at > current_timestamp() - INTERVAL 1 HOUR |
Supported | Not supported |
:::callout{theme="neutral"}
The IF(condition, trueValue, falseValue) function is not supported in Ontology SQL. Use CASE WHEN condition THEN trueValue ELSE falseValue END instead.
:::
Case-when statements¶
CASE WHEN condition THEN result
[WHEN condition THEN result]
[ELSE result]
END
Date/time intervals¶
INTERVAL { yearMonthIntervalQualifier | dayTimeIntervalQualifier }
yearMonthIntervalQualifier
{ YEAR [TO MONTH] |
MONTH }
dayTimeIntervalQualifier
{ DAY [TO { HOUR | MINUTE | SECOND } ] |
HOUR [TO { MINUTE | SECOND } ] |
MINUTE [TO SECOND] |
SECOND }
-- examples
INTERVAL '1' YEAR
INTERVAL '6' MONTH
INTERVAL '25' DAY
INTERVAL '6' YEAR TO MONTH
Data types¶
| Type | Spark equivalent | Furnace | Ontology |
|---|---|---|---|
BOOLEAN |
BOOLEAN |
Supported | Supported |
SHORT |
SHORT |
Supported | Supported |
INT |
INT |
Supported | Supported |
LONG |
LONG |
Supported | Supported |
FLOAT |
FLOAT |
Supported | Supported |
DOUBLE |
DOUBLE |
Supported | Supported |
DATE |
DATE |
Supported | Supported |
TIMESTAMP |
TIMESTAMP |
Supported | Supported |
TIMESTAMP_NTZ |
TIMESTAMP_NTZ |
Supported | Supported |
STRING |
STRING |
Supported | Supported |
BINARY |
BINARY |
Supported | Not supported |
DECIMAL |
DECIMAL |
Supported | Supported |
MAP<key, value> |
MAP<key, value> |
Supported | Not supported |
ARRAY<element_type> |
ARRAY<element_type> |
Supported | Supported |
STRUCT<field1_name: field1_type, field2_name: field2_type, …> |
STRUCT<field1_name: field1_type, field2_name: field2_type, …> |
Supported | Not supported |
CREATE TABLE `/path/to/table` (
col1 BOOLEAN,
col2 SHORT,
col3 INT,
col4 LONG,
col5 FLOAT,
col6 DOUBLE,
col7 DATE,
col8 TIMESTAMP,
col9 TIMESTAMP_NTZ,
col10 STRING,
col11 BINARY,
col12 DECIMAL(10, 2),
col13 MAP<STRING, STRING>,
col14 ARRAY<STRING>,
col15 STRUCT<col1: STRING, col2: STRING>
);
中文翻译¶
SQL 方言¶
Foundry SQL 通常是 Spark SQL ↗ 的一个子集,并遵循 ANSI 标准。
快速参考¶
常用操作¶
-- 创建表
CREATE TABLE `/path/to/table` (id INT, name STRING);
-- 插入数据
INSERT INTO `/path/to/table` VALUES (1, 'foo'), (2, 'bar');
-- 基本查询
SELECT * FROM `/path/to/table` WHERE id = 1;
-- 表连接
SELECT a.*, b.name
FROM `/path/to/table1` a
LEFT JOIN `/path/to/table2` b ON a.id = b.id;
-- 聚合
SELECT category, COUNT(*), AVG(price)
FROM `/path/to/products`
GROUP BY category
HAVING COUNT(*) > 10;
-- 窗口函数
SELECT name, salary,
RANK() OVER (ORDER BY salary DESC) as rank
FROM `/path/to/employees`;
-- CTE(公用表表达式)
WITH summary AS (
SELECT category, SUM(amount) as total
FROM `/path/to/sales`
GROUP BY category
)
SELECT * FROM summary WHERE total > 1000;
常用函数¶
| 类别 | 函数 |
|---|---|
| 聚合 | count(), sum(), avg(), min(), max(), count(DISTINCT col), collect_set() |
| 字符串 | upper(), lower(), trim(), concat(), concat_ws(), contains(), split(), substr() |
| 日期/时间 | current_date(), current_timestamp(), date_add(), date_diff(), to_date(), year(), month(), day() |
| 数学 | abs(), round(), ceil(), floor(), pow(), sqrt(), mod() |
| 数组 | array(), array_contains(), array_size(), array_distinct(), explode(), flatten() |
| 条件 | CASE WHEN ... THEN ... END, coalesce(), nullif(), regexp_like(), regexp_extract() |
| 窗口 | ROW_NUMBER(), RANK(), DENSE_RANK(), LAG(), LEAD() |
表引用¶
-- 按路径引用
SELECT * FROM `/path/to/table`;
-- 按 RID 引用
SELECT * FROM `ri.rid.for.table.1234`;
-- 指定分支
SELECT * FROM `/path/to/table`.branch_master;
关键语法说明¶
- 子查询必须用括号包裹。
- 默认创建的表为 Iceberg 格式,但可以使用
USING <format>语法覆盖。 - 只有 Iceberg 表支持
INSERT、UPDATE和DELETE操作。
创建表¶
CREATE [ OR REPLACE ] TABLE table_identifier [ ( col_name1 col_type1, ... ) ] [ USING table_format ] [ PARTITIONED BY ( expr [, ...] ) ] [ TBLPROPERTIES ( 'key'='value' [, ...] ) ] [ AS select_statement ]
PARTITIONED BY 仅支持 Iceberg 表。TBLPROPERTIES 用于设置 Iceberg 表属性 ↗。
如果未指定,表格式默认为 Iceberg。有效的表格式包括:
- Iceberg:
iceberg - Parquet:
parquet - Avro:
avro
-- 示例
CREATE TABLE `/path/to/table` AS
SELECT *
FROM `/path/to/table`;
CREATE TABLE `/path/to/table` USING parquet AS
SELECT *
FROM `/path/to/table`;
CREATE TABLE `/path/to/table`
(
id INT,
name STRING
);
CREATE OR REPLACE TABLE `/path/to/table` (id INT, name STRING);
CREATE TABLE `/path/to/table` (id INT, ts TIMESTAMP)
PARTITIONED BY (days(ts));
CREATE TABLE `/path/to/table` (id INT)
TBLPROPERTIES ('write.format.default'='parquet');
CREATE TABLE `/path/to/table`
PARTITIONED BY (id)
AS SELECT * FROM `/path/to/source`;
修改表¶
Iceberg 表支持使用 INSERT、UPDATE 和 DELETE 语句进行数据追加。非 Iceberg 表目前不支持修改操作。
插入¶
INSERT [ INTO ] table_identifier { VALUES ( { value | NULL } [ , ... ] ) [ , ( ... ) ] | query }
-- 示例
INSERT INTO `/path/to/table` VALUES (1, 'foo'), (2, 'bar');
INSERT INTO `/path/to/table` SELECT id, name FROM `/path/to/table2`;
更新¶
UPDATE table_identifier
SET column_name = value [, ...]
[ WHERE condition ]
-- 示例
UPDATE `/path/to/table`
SET col = 'new value';
UPDATE `/path/to/table`
SET col = 'new value'
WHERE col = 'old value';
删除¶
DELETE FROM table_identifier WHERE condition
DELETE FROM `/path/to/table` WHERE col = 'to delete'
修改元数据¶
Iceberg 表支持使用 ALTER TABLE 语法进行模式和元数据的修改。更多详情请参考 Iceberg Spark DDL 文档 ↗。
列操作¶
ALTER TABLE table_identifier ADD COLUMN col_name col_type [COMMENT 'comment']
ALTER TABLE table_identifier ADD COLUMNS (col_name col_type [COMMENT 'comment'], ...)
ALTER TABLE table_identifier DROP COLUMN col_name
ALTER TABLE table_identifier DROP COLUMNS (col_name, ...)
ALTER TABLE table_identifier RENAME COLUMN old_name TO new_name
ALTER TABLE table_identifier ALTER COLUMN col_name TYPE new_type
ALTER TABLE table_identifier ALTER COLUMN col_name COMMENT 'comment'
ALTER TABLE table_identifier ALTER COLUMN col_name SET NOT NULL
ALTER TABLE table_identifier ALTER COLUMN col_name DROP NOT NULL
-- 示例
ALTER TABLE `/path/to/table` ADD COLUMN new_col STRING;
ALTER TABLE `/path/to/table` ADD COLUMNS (col1 INT, col2 STRING COMMENT 'description');
ALTER TABLE `/path/to/table` DROP COLUMN old_col;
ALTER TABLE `/path/to/table` RENAME COLUMN old_name TO new_name;
ALTER TABLE `/path/to/table` ALTER COLUMN col2 TYPE BIGINT;
分区字段¶
ALTER TABLE table_identifier ADD PARTITION FIELD expr [AS alias]
ALTER TABLE table_identifier DROP PARTITION FIELD expr
ALTER TABLE table_identifier REPLACE PARTITION FIELD old_expr WITH new_expr [AS alias]
-- 示例
ALTER TABLE `/path/to/table` ADD PARTITION FIELD days(ts);
ALTER TABLE `/path/to/table` ADD PARTITION FIELD bucket(16, id) AS id_bucket;
ALTER TABLE `/path/to/table` DROP PARTITION FIELD days(ts);
ALTER TABLE `/path/to/table` REPLACE PARTITION FIELD days(ts) WITH hours(ts);
写入排序¶
ALTER TABLE table_identifier WRITE ORDERED BY col1 [ASC|DESC] [, ...]
ALTER TABLE table_identifier WRITE LOCALLY ORDERED BY col1 [ASC|DESC] [, ...]
ALTER TABLE table_identifier WRITE DISTRIBUTED BY PARTITION [LOCALLY ORDERED BY col1 [ASC|DESC] [, ...]]
ALTER TABLE table_identifier WRITE UNORDERED
-- 示例
ALTER TABLE `/path/to/table` WRITE ORDERED BY col1, col2 DESC;
ALTER TABLE `/path/to/table` WRITE DISTRIBUTED BY PARTITION LOCALLY ORDERED BY col1;
ALTER TABLE `/path/to/table` WRITE UNORDERED;
标识符字段¶
ALTER TABLE table_identifier SET IDENTIFIER FIELDS col1 [, ...]
ALTER TABLE table_identifier DROP IDENTIFIER FIELDS col1 [, ...]
-- 示例
ALTER TABLE `/path/to/table` SET IDENTIFIER FIELDS col1, col2;
ALTER TABLE `/path/to/table` DROP IDENTIFIER FIELDS col1, col2;
表属性¶
ALTER TABLE table_identifier SET TBLPROPERTIES ('key1'='value1' [, ...])
ALTER TABLE table_identifier UNSET TBLPROPERTIES ('key1' [, ...])
-- 示例
ALTER TABLE `/path/to/table` SET TBLPROPERTIES ('write.format.default'='parquet');
ALTER TABLE `/path/to/table` SET TBLPROPERTIES ('write.format.default'='parquet', 'write.parquet.compression-codec'='zstd');
ALTER TABLE `/path/to/table` UNSET TBLPROPERTIES ('write.format.default');
标签¶
ALTER TABLE table_identifier CREATE TAG [IF NOT EXISTS] tag_name [AS OF VERSION snapshot_id] [RETAIN n DAYS]
ALTER TABLE table_identifier CREATE OR REPLACE TAG tag_name [AS OF VERSION snapshot_id] [RETAIN n DAYS]
ALTER TABLE table_identifier DROP TAG [IF EXISTS] tag_name
ALTER TABLE table_identifier REPLACE TAG tag_name [AS OF VERSION snapshot_id] [RETAIN n DAYS]
-- 示例
ALTER TABLE `/path/to/table` CREATE TAG v1 AS OF VERSION 42;
ALTER TABLE `/path/to/table` CREATE TAG IF NOT EXISTS v1 AS OF VERSION 42 RETAIN 7 DAYS;
ALTER TABLE `/path/to/table` CREATE OR REPLACE TAG v2 AS OF VERSION 100;
ALTER TABLE `/path/to/table` DROP TAG IF EXISTS v1;
ALTER TABLE `/path/to/table` REPLACE TAG v1 AS OF VERSION 50 RETAIN 14 DAYS;
分支¶
ALTER TABLE table_identifier CREATE BRANCH [IF NOT EXISTS] branch_name [AS OF VERSION snapshot_id] [RETAIN n DAYS] [WITH SNAPSHOT RETENTION n SNAPSHOTS [n DAYS]]
ALTER TABLE table_identifier CREATE OR REPLACE BRANCH branch_name [AS OF VERSION snapshot_id] [RETAIN n DAYS] [WITH SNAPSHOT RETENTION n SNAPSHOTS [n DAYS]]
ALTER TABLE table_identifier DROP BRANCH [IF EXISTS] branch_name
ALTER TABLE table_identifier REPLACE BRANCH branch_name [AS OF VERSION snapshot_id] [RETAIN n DAYS]
-- 示例
ALTER TABLE `/path/to/table` CREATE BRANCH feature_branch;
ALTER TABLE `/path/to/table` CREATE BRANCH IF NOT EXISTS my_branch AS OF VERSION 42 RETAIN 7 DAYS;
ALTER TABLE `/path/to/table` CREATE OR REPLACE BRANCH my_branch AS OF VERSION 10;
ALTER TABLE `/path/to/table` DROP BRANCH IF EXISTS old_branch;
ALTER TABLE `/path/to/table` REPLACE BRANCH my_branch AS OF VERSION 5 RETAIN 30 DAYS;
事务块¶
事务块将多个语句组合在一起,作为一个原子作业执行。
BEGIN;
statement1;
statement2;
...
COMMIT;
规则:
BEGIN/COMMIT内的所有语句在同一个作业中一起执行。- 事务块不能嵌套。
- 事务块必须包含至少一个写入语句。
BEGIN没有COMMIT或COMMIT没有BEGIN将导致解析错误。
-- 原子性地创建并填充表
BEGIN;
CREATE OR REPLACE TABLE `/path/to/table` (id INT, name STRING);
INSERT INTO `/path/to/table` VALUES (1, 'foo'), (2, 'bar');
COMMIT;
-- 作为单个作业向多个表插入数据
BEGIN;
INSERT INTO `/path/to/table1` SELECT * FROM `/path/to/source`;
INSERT INTO `/path/to/table2` SELECT * FROM `/path/to/source`;
COMMIT;
查询组合¶
[ WITH with_query [ , ... ] ]
select_statement
[ { UNION | INTERSECT | EXCEPT } select_statement, ... ]
[ ORDER BY { expression [ ASC | DESC ] [ NULLS { FIRST | LAST } ] [, ...] } ]
[ WINDOW { window_name AS ( window_spec ) [, ...] } ]
[ LIMIT { ALL | expression } ]
WITH 子句(公用表表达式)¶
其中 with_query 为:
cte_name [ ( column_name [, ...] ) ] AS ( select_statement )
-- 示例
WITH sales_summary (cust_id, total_sales, order_count) AS (
SELECT customer_id, SUM(total_amount), COUNT(*)
FROM `/data/orders`
GROUP BY customer_id
)
SELECT * FROM sales_summary WHERE total_sales > 10000;
SELECT 语句¶
其中 select_statement 为:
SELECT [ ALL | DISTINCT ] { [ named_expression | regex_column_names | * ] [, ...] }
FROM { from_item [, ...] }
[ WHERE boolean_expression ]
[ GROUP BY { expression | ROLLUP(...) | CUBE(...) | GROUPING SETS(...) } [ , ... ] ]
[ HAVING boolean_expression ]
[ LIMIT { ALL | expression } [ OFFSET expression ] ]
命名表达式¶
其中 named_expression 为以下之一:
带可选别名的表达式:
expression [ AS alias ]
-- 示例
SELECT count(*) AS total_count FROM `/path/to/table`;
SELECT name, salary * 1.1 AS adjusted_salary FROM `/path/to/employees`;
窗口函数:
window_function() OVER ( window_spec )
window_function() OVER window_name
其中 window_spec 为:
[ PARTITION BY expression [, ...] ]
[ ORDER BY expression [ ASC | DESC ] [ NULLS { FIRST | LAST } ] [, ...] ]
-- 示例
SELECT name, ROW_NUMBER() OVER (ORDER BY salary DESC) as rank
FROM `/path/to/employees`;
SELECT category, product_id,
ROW_NUMBER() OVER (PARTITION BY category ORDER BY price DESC) as category_rank
FROM `/path/to/products`;
-- 使用命名窗口
SELECT customer_id, order_date, total,
ROW_NUMBER() OVER w as order_sequence
FROM `/path/to/orders`
WINDOW w AS (PARTITION BY customer_id ORDER BY order_date);
FROM 子句¶
其中 from_item 为以下之一:
表引用¶
表可以通过文件系统路径或资源标识符(RID)进行引用。两者都支持可选的分支规范,用于查询特定的 Iceberg 分支。包含特殊字符的分支名称需要使用反引号进行转义。
{ `/path/to/table`[.branch_<branch name>] | `ri.rid.for.table.1234`[.branch_<branch name>] }
-- 示例
SELECT * FROM `/path/to/table`;
SELECT * FROM `/path/to/table`.branch_master;
SELECT * FROM `/path/to/table` WHERE id = 1;
SELECT * FROM `ri.rid.for.table.1234`;
SELECT * FROM `ri.rid.for.table`.`branch_abc\xyz`;
可以使用 VERSION AS OF 语法进行时间旅行查询,对于 Iceberg 表使用快照 ID,对于 Foundry 数据集使用事务 RID。
-- 示例
SELECT * FROM `/path/to/table` VERSION AS OF `ri.foundry.main.transaction.1234`;
SELECT * FROM `/path/to/table` VERSION AS OF 1234567;
对于 Foundry 数据集,可以使用 VERSION BETWEEN ... AND ... 读取在某个事务范围内添加的行。该查询返回严格在起始事务之后、截至结束事务(含)添加的行。起始边界是排他的,结束边界是包含的。两个边界都必须是事务 RID,而不是快照 ID。
-- 在 txn.1234 之后追加的行,截至并包括 txn.5678
SELECT * FROM `/path/to/table`
VERSION BETWEEN `ri.foundry.main.transaction.1234` AND `ri.foundry.main.transaction.5678`;
VERSION BETWEEN 不支持 Iceberg 表。其他时间旅行语法目前也不支持。
连接¶
relation
{ CROSS | LEFT [ OUTER ] | RIGHT [ OUTER ] | FULL [ OUTER ] }
JOIN relation
ON boolean_expression
-- 示例
SELECT * FROM `/path/to/table` LEFT JOIN `/path/to/table2` ON id = id2;
SELECT * FROM `/path/to/table` RIGHT OUTER JOIN `/path/to/table2` ON id = id2;
子查询¶
( select_statement )
-- 示例
SELECT * FROM (SELECT * FROM `/path/to/table`);
SELECT * FROM (SELECT * FROM `/path/to/table`) WHERE id = 1;
SELECT * FROM (
VALUES (9001, 1001, 1, DATE '2023-06-01', 'credit_card', 1329.98, 'completed',
TIMESTAMP '2023-06-01 11:00:00')
) AS orders(order_id, customer_id, product_id, order_date, payment_method, total, status, created_at);
:::callout{theme="neutral"} 子查询必须用括号包裹。即使在 Spark SQL 中并非总是必需,但在 Foundry 中强制执行此规则。 :::
完整查询示例¶
-- 带连接和过滤的复杂查询
SELECT DISTINCT o.order_id,
c.name,
o.total_amount,
p.category
FROM `/data/orders` o
LEFT JOIN `/data/customers` c ON o.customer_id = c.id
CROSS JOIN (SELECT product_id, category
FROM `/data/products`
WHERE category = 'Electronics') p ON o.product_id = p.product_id
WHERE o.order_date > DATE '2024-01-01'
AND o.total_amount > 1000.0
ORDER BY o.total_amount DESC
LIMIT 100;
-- 带 HAVING 子句的聚合
SELECT c.name,
COUNT(o.order_id) as order_count,
SUM(o.total_amount) as total_spent
FROM `/data/customers` c
LEFT JOIN `/data/orders` o ON c.id = o.customer_id
GROUP BY c.name
HAVING COUNT(o.order_id) > 0;
-- 带多个连接的 UNION 查询
SELECT emp_id, name, dept, 'Active' AS status
FROM `/hr/employees` e
LEFT OUTER JOIN `/hr/departments` d ON e.dept_id = d.id
WHERE e.active = true
UNION
SELECT emp_id, name, dept, 'Inactive' AS status
FROM (SELECT *
FROM `/hr/employees`
WHERE termination_date < current_timestamp()) e
RIGHT JOIN `/hr/departments` d ON e.dept_id = d.id
WHERE e.active = false
ORDER BY dept ASC, status DESC NULLS LAST;
SQL 提示¶
SQL 提示为查询引擎提供额外的指令。提示使用以下语法指定:
/*+ hint1, hint2(opts...) */
目前支持以下提示:
adhoc- 将写入查询作为临时查询运行。新查询作为单独的目标添加到表上的任何现有目标旁边,保留现有的默认目标。直接重建表仍将使用原始默认目标,而不是临时目标。
/*+ adhoc */
CREATE OR REPLACE TABLE `/path/to/table` AS SELECT * FROM `/path/to/source`;
Iceberg 引用¶
Foundry SQL 支持 Iceberg 引用来查询元数据。请查阅 官方文档 ↗。
-- 示例
SELECT * FROM `/path/to/table/`.files;
SELECT * FROM `/path/to/table/`.snapshots;
内省¶
使用 SHOW FUNCTIONS 列出 Foundry SQL 支持的函数。该语句返回三列:name、kind(值为 scalar、aggregate 或 window)和 signature。可选的 LIKE 模式按函数名称过滤,其中 % 匹配任意字符序列,_ 匹配单个字符。
SHOW FUNCTIONS;
SHOW FUNCTIONS LIKE 'abs%';
函数¶
函数可用于以更复杂的方式转换数据。有关以下每个函数的详细信息,请参考 Spark SQL 文档 ↗。
数组函数¶
| 函数 | 描述 | 示例 | Furnace | Ontology |
|---|---|---|---|---|
array(expr, ...) |
使用给定元素创建数组 | SELECT array(1, 2, 3) |
支持 | 支持 |
array_distinct(array) |
移除数组中的重复值 | SELECT array_distinct(tags) FROM table |
支持 | 不支持 |
array_intersect(array1, array2) |
两个数组中都存在的元素(无重复) | SELECT array_intersect(arr1, arr2) FROM table |
支持 | 不支持 |
array_union(array1, array2) |
任一数组中存在的元素(无重复) | SELECT array_union(arr1, arr2) FROM table |
支持 | 不支持 |
array_except(array1, array2) |
在 array1 中但不在 array2 中的元素 | SELECT array_except(arr1, arr2) FROM table |
支持 | 不支持 |
array_join(array, delimiter [, nullReplacement]) |
使用分隔符连接数组元素 | SELECT array_join(tags, ',') FROM table |
支持 | 不支持 |
array_max(array) |
数组中的最大值 | SELECT array_max(ARRAY(1, 5, 3)) |
支持 | 不支持 |
array_min(array) |
数组中的最小值 | SELECT array_min(ARRAY(1, 5, 3)) |
支持 | 不支持 |
array_position(array, element) |
第一次出现的位置(从1开始,未找到返回0) | SELECT array_position(tags, 'item') FROM table |
支持 | 不支持 |
array_remove(array, element) |
移除所有匹配的元素 | SELECT array_remove(tags, 'old') FROM table |
支持 | 不支持 |
array_size(array) |
数组中的元素数量 | SELECT array_size(tags) FROM table |
支持 | 不支持 |
array_contains(array, value) |
如果数组包含该值则返回 true | SELECT array_contains(tags, 'new') FROM table |
支持 | 不支持 |
array_repeat(element, count) |
创建元素重复 count 次的数组 | SELECT array_repeat('x', 5) |
支持 | 不支持 |
arrays_overlap(array1, array2) |
如果数组有共同的非空元素则返回 true | SELECT arrays_overlap(arr1, arr2) FROM table |
支持 | 不支持 |
flatten(arrayOfArrays) |
将嵌套数组展平为单个数组 | SELECT flatten(nested_arrays) FROM table |
支持 | 不支持 |
sequence(start, stop, step) |
生成整数序列 | SELECT sequence(1, 10, 2) |
支持 | 不支持 |
shuffle(array) |
数组的随机排列 | SELECT shuffle(tags) FROM table |
支持 | 不支持 |
slice(array, start, length) |
从 start 开始长度为 length 的数组子集 | SELECT slice(tags, 1, 2) FROM table |
支持 | 不支持 |
sort_array(array[, ascendingOrder]) |
按升序(默认)或降序排序数组 | SELECT sort_array(tags) FROM table |
支持 | 不支持 |
array_sort(array) |
按升序排序数组,null 值排在最后 | SELECT array_sort(tags) FROM table |
支持 | 不支持 |
get(array, index) |
返回给定索引处的元素(从0开始) | SELECT get(tags, 0) FROM table |
支持 | 不支持 |
element_at(array, index) |
返回给定索引处的元素(从1开始;负数从末尾计数) | SELECT element_at(tags, 1) FROM table |
支持 | 不支持 |
size(array) |
数组中的元素数量(也接受 map) | SELECT size(tags) FROM table |
支持 | 不支持 |
cardinality(array) |
数组中的元素数量(也接受 map) | SELECT cardinality(tags) FROM table |
支持 | 不支持 |
explode(array) |
为数组中的每个元素生成新行 | SELECT explode(tags) FROM table |
支持 | 不支持 |
Map 函数¶
| 函数 | 描述 | 示例 | Furnace | Ontology |
|---|---|---|---|---|
map(key1, value1, key2, value2, ...) |
从键值对创建 map | SELECT map('a', 1, 'b', 2) |
支持 | 不支持 |
map_keys(map) |
返回 map 的键数组 | SELECT map_keys(attributes) FROM table |
支持 | 不支持 |
map_values(map) |
返回 map 的值数组 | SELECT map_values(attributes) FROM table |
支持 | 不支持 |
element_at(map, key) |
返回给定键的值 | SELECT element_at(attributes, 'color') FROM table |
支持 | 不支持 |
size(map) |
map 中的条目数(也接受数组) | SELECT size(attributes) FROM table |
支持 | 不支持 |
cardinality(map) |
map 中的条目数(也接受数组) | SELECT cardinality(attributes) FROM table |
支持 | 不支持 |
Struct 函数¶
| 函数 | 描述 | 示例 | Furnace | Ontology |
|---|---|---|---|---|
struct(expr1, expr2, ...) |
从表达式创建 struct | SELECT struct('red', 'large', 2.0) |
支持 | 不支持 |
日期和时间戳函数¶
| 函数 | 描述 | 示例 | Furnace | Ontology |
|---|---|---|---|---|
current_timestamp() |
返回当前时间戳 | SELECT current_timestamp() |
支持 | 支持 |
current_date() |
返回当前日期 | SELECT current_date() |
支持 | 支持 |
now() |
返回当前时间戳(current_timestamp 的别名) |
SELECT now() |
支持 | 不支持 |
add_months(date, num_months) |
给日期添加月数 | add_months(order_date, 3) |
支持 | 不支持 |
date_add(expr, num_days) |
给日期添加天数 | date_add(order_date, 7) |
支持 | 支持 |
date_format(expr, format) |
将日期或时间戳格式化为字符串 | SELECT date_format(TIMESTAMP '2024-01-15 10:30:00', 'yyyy-MM-dd') |
支持 | 不支持 |
date_sub(expr, num_days) |
从日期减去天数 | date_sub(order_date, 7) |
支持 | 支持 |
from_unixtime(expr [, format]) |
将 Unix 时间戳转换为时间戳 | from_unixtime(1609459200) |
支持 | 不支持 |
unix_timestamp([expr] [, format]) |
将时间戳转换为 Unix 时间戳 | unix_timestamp(current_timestamp()) |
支持 | 不支持 |
day(date) |
从日期中提取月份中的日 | SELECT day(order_date) FROM table |
支持 | 支持 |
hour(timestamp) |
从时间戳中提取小时 | SELECT hour(order_timestamp) FROM table |
支持 | 不支持 |
minute(timestamp) |
从时间戳中提取分钟 | SELECT minute(order_timestamp) FROM table |
支持 | 不支持 |
month(date) |
从日期中提取月份 | SELECT month(order_date) FROM table |
支持 | 支持 |
quarter(date) |
从日期中提取季度 | SELECT quarter(order_date) FROM table |
支持 | 支持 |
second(timestamp) |
从时间戳中提取秒 | SELECT second(order_timestamp) FROM table |
支持 | 不支持 |
year(date) |
从日期中提取年份 | SELECT year(order_date) FROM table |
支持 | 支持 |
date_trunc(unit, timestamp) |
将时间戳截断到指定单位 | SELECT date_trunc('day', current_timestamp()) |
支持 | 不支持 |
trunc(date, fmt) |
将日期截断到 YEAR、QUARTER、MONTH 或 WEEK 粒度 |
SELECT trunc(order_date, 'MM') |
支持 | 不支持 |
to_date(str [, format]) |
将字符串转换为日期 | SELECT to_date('2024-01-15') |
支持 | 不支持 |
to_timestamp(expr [, format]) |
转换为时间戳 | SELECT to_timestamp('2024-01-15 10:30:00') |
支持 | 不支持 |
timestamp(expr) |
转换为时间戳的简写形式 | SELECT timestamp('2024-01-15 10:30:00') |
支持 | 不支持 |
from_utc_timestamp(ts, tz) |
在给定时区中解释 UTC 时间戳 | SELECT from_utc_timestamp(ts, 'America/Los_Angeles') |
支持 | 不支持 |
to_utc_timestamp(ts, tz) |
将给定时区的时间戳转换为 UTC | SELECT to_utc_timestamp(ts, 'America/Los_Angeles') |
支持 | 不支持 |
dayofmonth(date) |
从日期中提取月份中的日(day 的别名) |
SELECT dayofmonth(order_date) FROM table |
支持 | 不支持 |
dayofweek(date) |
星期几(1 = 星期日,7 = 星期六) | SELECT dayofweek(order_date) FROM table |
支持 | 不支持 |
weekday(date) |
星期几(0 = 星期一,6 = 星期日) | SELECT weekday(order_date) FROM table |
支持 | 不支持 |
dayofyear(date) |
一年中的第几天(1 到 366) | SELECT dayofyear(order_date) FROM table |
支持 | 不支持 |
weekofyear(date) |
ISO 周数 | SELECT weekofyear(order_date) FROM table |
支持 | 不支持 |
last_day(date) |
包含该日期的月份的最后一天 | SELECT last_day(order_date) FROM table |
支持 | 不支持 |
next_day(start_date, day_of_week) |
start_date 之后第一个指定星期几的日期 |
SELECT next_day(order_date, 'Mon') FROM table |
支持 | 不支持 |
date_diff(date1, date2) |
两个日期相减 | SELECT date_diff('2020-01-01', '2020-01-02') |
支持 | 支持 |
months_between(ts1, ts2 [, roundOff]) |
两个时间戳之间的月份数(小数)(除非 roundOff 为 false,否则四舍五入到八位) |
SELECT months_between('1997-02-28 10:30:00', '1996-10-30') |
支持 | 不支持 |
timestamp_seconds(seconds) |
从 UTC 纪元以来的秒数创建时间戳(接受小数值) | SELECT timestamp_seconds(1230219000) |
支持 | 不支持 |
timestamp_millis(millis) |
从 UTC 纪元以来的毫秒数创建时间戳 | SELECT timestamp_millis(1230219000123) |
支持 | 不支持 |
timestamp_micros(micros) |
从 UTC 纪元以来的微秒数创建时间戳 | SELECT timestamp_micros(1230219000123123) |
支持 | 不支持 |
unix_seconds(timestamp) |
自 1970-01-01 UTC 以来的秒数(截断更高精度) | SELECT unix_seconds(current_timestamp()) |
支持 | 不支持 |
unix_millis(timestamp) |
自 1970-01-01 UTC 以来的毫秒数(截断更高精度) | SELECT unix_millis(current_timestamp()) |
支持 | 不支持 |
unix_micros(timestamp) |
自 1970-01-01 UTC 以来的微秒数 | SELECT unix_micros(current_timestamp()) |
支持 | 不支持 |
数学函数¶
| 函数 | 描述 | 示例 | Furnace | Ontology |
|---|---|---|---|---|
abs(x) |
绝对值 | SELECT abs(-5) |
支持 | 支持 |
acos(x) |
反余弦 | SELECT acos(0.5) |
支持 | 不支持 |
asin(x) |
反正弦 | SELECT asin(0.5) |
支持 | 不支持 |
atan(x) |
反正切 | SELECT atan(1) |
支持 | 不支持 |
atan2(y, x) |
y/x 的反正切 | SELECT atan2(1, 1) |
支持 | 不支持 |
cbrt(x) |
立方根 | SELECT cbrt(27) |
支持 | 不支持 |
ceil(x) |
向上取整到最接近的整数 | SELECT ceil(3.7) |
支持 | 支持 |
ceiling(x) |
向上取整到最接近的整数 | SELECT ceiling(3.7) |
支持 | 支持 |
cos(x) |
余弦 | SELECT cos(pi()) |
支持 |