跳转至

SQL dialect(SQL 方言)

Foundry SQL is generally a subset of Spark SQL ↗ with ANSI compliance.

Quick reference

Common operations

-- Create a table
CREATE TABLE `/path/to/table` (id INT, name STRING);

-- Insert data
INSERT INTO `/path/to/table` VALUES (1, 'foo'), (2, 'bar');

-- Basic query
SELECT * FROM `/path/to/table` WHERE id = 1;

-- Join tables
SELECT a.*, b.name
FROM `/path/to/table1` a
LEFT JOIN `/path/to/table2` b ON a.id = b.id;

-- Aggregate
SELECT category, COUNT(*), AVG(price)
FROM `/path/to/products`
GROUP BY category
HAVING COUNT(*) > 10;

-- Window function
SELECT name, salary,
       RANK() OVER (ORDER BY salary DESC) as rank
FROM `/path/to/employees`;

-- CTE (Common Table Expression)
WITH summary AS (
  SELECT category, SUM(amount) as total
  FROM `/path/to/sales`
  GROUP BY category
)
SELECT * FROM summary WHERE total > 1000;

Commonly used functions

Category Functions
Aggregation count(), sum(), avg(), min(), max(), count(DISTINCT col), collect_set()
String upper(), lower(), trim(), concat(), concat_ws(), contains(), split(), substr()
Date/Time current_date(), current_timestamp(), date_add(), date_diff(), to_date(), year(), month(), day()
Math abs(), round(), ceil(), floor(), pow(), sqrt(), mod()
Array array(), array_contains(), array_size(), array_distinct(), explode(), flatten()
Conditional CASE WHEN ... THEN ... END, coalesce(), nullif(), regexp_like(), regexp_extract()
Window ROW_NUMBER(), RANK(), DENSE_RANK(), LAG(), LEAD()

Table references

-- By path
SELECT * FROM `/path/to/table`;

-- By RID
SELECT * FROM `ri.rid.for.table.1234`;

-- With branch
SELECT * FROM `/path/to/table`.branch_master;

Key syntax notes

  • Subqueries must be wrapped in parentheses.
  • Created tables are Iceberg by default but can be overridden with USING <format> syntax.
  • Only Iceberg tables support INSERT, UPDATE, and DELETE.

Table creation

CREATE [ OR REPLACE ] TABLE table_identifier [ ( col_name1 col_type1, ... ) ] [ USING table_format ] [ PARTITIONED BY ( expr [, ...] ) ] [ TBLPROPERTIES ( 'key'='value' [, ...] ) ] [ AS select_statement ]

PARTITIONED BY is only supported with Iceberg tables. TBLPROPERTIES sets Iceberg table properties ↗.

If not specified, the table format will be Iceberg. Valid table formats include:

  • Iceberg: iceberg
  • Parquet: parquet
  • Avro: avro
-- examples
CREATE TABLE `/path/to/table` AS
SELECT *
FROM `/path/to/table`;

CREATE TABLE `/path/to/table` USING parquet AS
SELECT *
FROM `/path/to/table`;

CREATE TABLE `/path/to/table`
(
    id   INT,
    name STRING
);

CREATE OR REPLACE TABLE `/path/to/table` (id INT, name STRING);

CREATE TABLE `/path/to/table` (id INT, ts TIMESTAMP)
    PARTITIONED BY (days(ts));

CREATE TABLE `/path/to/table` (id INT)
    TBLPROPERTIES ('write.format.default'='parquet');

CREATE TABLE `/path/to/table`
    PARTITIONED BY (id)
    AS SELECT * FROM `/path/to/source`;

Table alteration

Iceberg tables support data appends with INSERT, UPDATE, and DELETE statements. Non-Iceberg tables do not currently support table alteration.

Insert

INSERT [ INTO ] table_identifier { VALUES ( { value | NULL } [ , ... ] ) [ , ( ... ) ] | query }
-- examples
INSERT INTO `/path/to/table` VALUES (1, 'foo'), (2, 'bar');

INSERT INTO `/path/to/table` SELECT id, name FROM `/path/to/table2`;

Update

UPDATE table_identifier
SET column_name = value [, ...]
[ WHERE condition ]
-- examples
UPDATE `/path/to/table`
SET col = 'new value';

UPDATE `/path/to/table`
SET col = 'new value'
WHERE col = 'old value';

Delete

DELETE FROM table_identifier WHERE condition
DELETE FROM `/path/to/table` WHERE col = 'to delete'

Alter metadata

Iceberg tables support schema and metadata changes with ALTER TABLE syntax. Refer to the Iceberg Spark DDL documentation ↗ for additional details.

Column operations

ALTER TABLE table_identifier ADD COLUMN col_name col_type [COMMENT 'comment']
ALTER TABLE table_identifier ADD COLUMNS (col_name col_type [COMMENT 'comment'], ...)
ALTER TABLE table_identifier DROP COLUMN col_name
ALTER TABLE table_identifier DROP COLUMNS (col_name, ...)
ALTER TABLE table_identifier RENAME COLUMN old_name TO new_name
ALTER TABLE table_identifier ALTER COLUMN col_name TYPE new_type
ALTER TABLE table_identifier ALTER COLUMN col_name COMMENT 'comment'
ALTER TABLE table_identifier ALTER COLUMN col_name SET NOT NULL
ALTER TABLE table_identifier ALTER COLUMN col_name DROP NOT NULL
-- examples
ALTER TABLE `/path/to/table` ADD COLUMN new_col STRING;
ALTER TABLE `/path/to/table` ADD COLUMNS (col1 INT, col2 STRING COMMENT 'description');
ALTER TABLE `/path/to/table` DROP COLUMN old_col;
ALTER TABLE `/path/to/table` RENAME COLUMN old_name TO new_name;
ALTER TABLE `/path/to/table` ALTER COLUMN col2 TYPE BIGINT;

Partition fields

ALTER TABLE table_identifier ADD PARTITION FIELD expr [AS alias]
ALTER TABLE table_identifier DROP PARTITION FIELD expr
ALTER TABLE table_identifier REPLACE PARTITION FIELD old_expr WITH new_expr [AS alias]
-- examples
ALTER TABLE `/path/to/table` ADD PARTITION FIELD days(ts);
ALTER TABLE `/path/to/table` ADD PARTITION FIELD bucket(16, id) AS id_bucket;
ALTER TABLE `/path/to/table` DROP PARTITION FIELD days(ts);
ALTER TABLE `/path/to/table` REPLACE PARTITION FIELD days(ts) WITH hours(ts);

Write ordering

ALTER TABLE table_identifier WRITE ORDERED BY col1 [ASC|DESC] [, ...]
ALTER TABLE table_identifier WRITE LOCALLY ORDERED BY col1 [ASC|DESC] [, ...]
ALTER TABLE table_identifier WRITE DISTRIBUTED BY PARTITION [LOCALLY ORDERED BY col1 [ASC|DESC] [, ...]]
ALTER TABLE table_identifier WRITE UNORDERED
-- examples
ALTER TABLE `/path/to/table` WRITE ORDERED BY col1, col2 DESC;
ALTER TABLE `/path/to/table` WRITE DISTRIBUTED BY PARTITION LOCALLY ORDERED BY col1;
ALTER TABLE `/path/to/table` WRITE UNORDERED;

Identifier fields

ALTER TABLE table_identifier SET IDENTIFIER FIELDS col1 [, ...]
ALTER TABLE table_identifier DROP IDENTIFIER FIELDS col1 [, ...]
-- examples
ALTER TABLE `/path/to/table` SET IDENTIFIER FIELDS col1, col2;
ALTER TABLE `/path/to/table` DROP IDENTIFIER FIELDS col1, col2;

Table properties

ALTER TABLE table_identifier SET TBLPROPERTIES ('key1'='value1' [, ...])
ALTER TABLE table_identifier UNSET TBLPROPERTIES ('key1' [, ...])
-- examples
ALTER TABLE `/path/to/table` SET TBLPROPERTIES ('write.format.default'='parquet');
ALTER TABLE `/path/to/table` SET TBLPROPERTIES ('write.format.default'='parquet', 'write.parquet.compression-codec'='zstd');
ALTER TABLE `/path/to/table` UNSET TBLPROPERTIES ('write.format.default');

Tags

ALTER TABLE table_identifier CREATE TAG [IF NOT EXISTS] tag_name [AS OF VERSION snapshot_id] [RETAIN n DAYS]
ALTER TABLE table_identifier CREATE OR REPLACE TAG tag_name [AS OF VERSION snapshot_id] [RETAIN n DAYS]
ALTER TABLE table_identifier DROP TAG [IF EXISTS] tag_name
ALTER TABLE table_identifier REPLACE TAG tag_name [AS OF VERSION snapshot_id] [RETAIN n DAYS]
-- examples
ALTER TABLE `/path/to/table` CREATE TAG v1 AS OF VERSION 42;
ALTER TABLE `/path/to/table` CREATE TAG IF NOT EXISTS v1 AS OF VERSION 42 RETAIN 7 DAYS;
ALTER TABLE `/path/to/table` CREATE OR REPLACE TAG v2 AS OF VERSION 100;
ALTER TABLE `/path/to/table` DROP TAG IF EXISTS v1;
ALTER TABLE `/path/to/table` REPLACE TAG v1 AS OF VERSION 50 RETAIN 14 DAYS;

Branches

ALTER TABLE table_identifier CREATE BRANCH [IF NOT EXISTS] branch_name [AS OF VERSION snapshot_id] [RETAIN n DAYS] [WITH SNAPSHOT RETENTION n SNAPSHOTS [n DAYS]]
ALTER TABLE table_identifier CREATE OR REPLACE BRANCH branch_name [AS OF VERSION snapshot_id] [RETAIN n DAYS] [WITH SNAPSHOT RETENTION n SNAPSHOTS [n DAYS]]
ALTER TABLE table_identifier DROP BRANCH [IF EXISTS] branch_name
ALTER TABLE table_identifier REPLACE BRANCH branch_name [AS OF VERSION snapshot_id] [RETAIN n DAYS]
-- examples
ALTER TABLE `/path/to/table` CREATE BRANCH feature_branch;
ALTER TABLE `/path/to/table` CREATE BRANCH IF NOT EXISTS my_branch AS OF VERSION 42 RETAIN 7 DAYS;
ALTER TABLE `/path/to/table` CREATE OR REPLACE BRANCH my_branch AS OF VERSION 10;
ALTER TABLE `/path/to/table` DROP BRANCH IF EXISTS old_branch;
ALTER TABLE `/path/to/table` REPLACE BRANCH my_branch AS OF VERSION 5 RETAIN 30 DAYS;

Transaction blocks

Transaction blocks group multiple statements to execute as a single atomic job.

BEGIN;
statement1;
statement2;
...
COMMIT;

Rules:

  • All statements within BEGIN/COMMIT execute together in a single job.
  • Transaction blocks cannot be nested.
  • A transaction block must contain at least one write statement.
  • BEGIN without COMMIT or COMMIT without BEGIN is a parse error.
-- Create and populate a table atomically
BEGIN;
CREATE OR REPLACE TABLE `/path/to/table` (id INT, name STRING);
INSERT INTO `/path/to/table` VALUES (1, 'foo'), (2, 'bar');
COMMIT;

-- Multiple inserts into separate tables as a single job
BEGIN;
INSERT INTO `/path/to/table1` SELECT * FROM `/path/to/source`;
INSERT INTO `/path/to/table2` SELECT * FROM `/path/to/source`;
COMMIT;

Query composition

[ WITH with_query [ , ... ] ]
select_statement
  [ { UNION | INTERSECT | EXCEPT } select_statement, ... ]
  [ ORDER BY { expression [ ASC | DESC ] [ NULLS { FIRST | LAST } ] [, ...] } ]
  [ WINDOW { window_name AS ( window_spec ) [, ...] } ]
  [ LIMIT { ALL | expression } ]

WITH clause (common table expressions)

Where with_query is:

cte_name [ ( column_name [, ...] ) ] AS ( select_statement )
-- examples
WITH sales_summary (cust_id, total_sales, order_count) AS (
  SELECT customer_id, SUM(total_amount), COUNT(*)
  FROM `/data/orders`
  GROUP BY customer_id
)
SELECT * FROM sales_summary WHERE total_sales > 10000;

SELECT statement

Where select_statement is:

SELECT [ ALL | DISTINCT ] { [ named_expression | regex_column_names | * ] [, ...] }
  FROM { from_item [, ...] }
  [ WHERE boolean_expression ]
  [ GROUP BY { expression | ROLLUP(...) | CUBE(...) | GROUPING SETS(...) } [ , ... ] ]
  [ HAVING boolean_expression ]
  [ LIMIT { ALL | expression } [ OFFSET expression ] ]

Named expressions

Where named_expression is one of:

Expression with optional alias:

expression [ AS alias ]
-- examples
SELECT count(*) AS total_count FROM `/path/to/table`;

SELECT name, salary * 1.1 AS adjusted_salary FROM `/path/to/employees`;

Window function:

window_function() OVER ( window_spec )
window_function() OVER window_name

Where window_spec is:

[ PARTITION BY expression [, ...] ]
[ ORDER BY expression [ ASC | DESC ] [ NULLS { FIRST | LAST } ] [, ...] ]
-- examples
SELECT name, ROW_NUMBER() OVER (ORDER BY salary DESC) as rank
FROM `/path/to/employees`;

SELECT category, product_id,
       ROW_NUMBER() OVER (PARTITION BY category ORDER BY price DESC) as category_rank
FROM `/path/to/products`;

-- Using named window
SELECT customer_id, order_date, total,
       ROW_NUMBER() OVER w as order_sequence
FROM `/path/to/orders`
WINDOW w AS (PARTITION BY customer_id ORDER BY order_date);

FROM clause

Where from_item is one of:

Table reference

Tables can be referenced by filesystem path or resource identifier (RID). Both support optional branch specifications for querying specific Iceberg branches. Branch names that contain special characters need to be escaped with backticks.

{ `/path/to/table`[.branch_<branch name>] | `ri.rid.for.table.1234`[.branch_<branch name>] }
-- examples
SELECT * FROM `/path/to/table`;

SELECT * FROM `/path/to/table`.branch_master;

SELECT * FROM `/path/to/table` WHERE id = 1;

SELECT * FROM `ri.rid.for.table.1234`;

SELECT * FROM `ri.rid.for.table`.`branch_abc\xyz`;

Time travel queries to a specific version are possible with the VERSION AS OF syntax using a snapshot ID for Iceberg tables and a transaction RID for Foundry datasets.

-- Examples
SELECT * FROM `/path/to/table` VERSION AS OF `ri.foundry.main.transaction.1234`;

SELECT * FROM `/path/to/table` VERSION AS OF 1234567;

For Foundry datasets, you can read the rows added within a range of transactions with VERSION BETWEEN ... AND .... The query returns rows added strictly after the start transaction up through the end transaction. The start bound is exclusive and the end bound is inclusive. Both bounds must be transaction RIDs, not snapshot IDs.

-- Rows appended after txn.1234, up to and including txn.5678
SELECT * FROM `/path/to/table`
    VERSION BETWEEN `ri.foundry.main.transaction.1234` AND `ri.foundry.main.transaction.5678`;

VERSION BETWEEN is not supported on Iceberg tables. Other time travel syntax is not currently supported.

Join

relation
  { CROSS | LEFT [ OUTER ] | RIGHT [ OUTER ] | FULL [ OUTER ] }
  JOIN relation
  ON boolean_expression
-- examples
SELECT * FROM `/path/to/table` LEFT JOIN `/path/to/table2` ON id = id2;

SELECT * FROM `/path/to/table` RIGHT OUTER JOIN `/path/to/table2` ON id = id2;

Subquery

( select_statement )
-- examples
SELECT * FROM (SELECT * FROM `/path/to/table`);

SELECT * FROM (SELECT * FROM `/path/to/table`) WHERE id = 1;

SELECT * FROM (
  VALUES (9001, 1001, 1, DATE '2023-06-01', 'credit_card', 1329.98, 'completed',
          TIMESTAMP '2023-06-01 11:00:00')
) AS orders(order_id, customer_id, product_id, order_date, payment_method, total, status, created_at);

:::callout{theme="neutral"} Subqueries must be wrapped in parentheses. This is enforced in Foundry even though it is not always required in Spark SQL. :::

Complete query examples

-- Complex query with joins and filtering
SELECT DISTINCT o.order_id,
                c.name,
                o.total_amount,
                p.category
FROM `/data/orders` o
         LEFT JOIN `/data/customers` c ON o.customer_id = c.id
         CROSS JOIN (SELECT product_id, category
                     FROM `/data/products`
                     WHERE category = 'Electronics') p ON o.product_id = p.product_id
WHERE o.order_date > DATE '2024-01-01'
  AND o.total_amount > 1000.0
ORDER BY o.total_amount DESC
LIMIT 100;

-- Aggregation with HAVING clause
SELECT c.name,
       COUNT(o.order_id) as order_count,
       SUM(o.total_amount) as total_spent
FROM `/data/customers` c
       LEFT JOIN `/data/orders` o ON c.id = o.customer_id
GROUP BY c.name
HAVING COUNT(o.order_id) > 0;

-- UNION query with multiple joins
SELECT emp_id, name, dept, 'Active' AS status
FROM `/hr/employees` e
         LEFT OUTER JOIN `/hr/departments` d ON e.dept_id = d.id
WHERE e.active = true
UNION
SELECT emp_id, name, dept, 'Inactive' AS status
FROM (SELECT *
      FROM `/hr/employees`
      WHERE termination_date < current_timestamp()) e
         RIGHT JOIN `/hr/departments` d ON e.dept_id = d.id
WHERE e.active = false
ORDER BY dept ASC, status DESC NULLS LAST;

SQL hints

SQL hints provide additional directives to the query engine. Hints are specified using the following syntax:

/*+ hint1, hint2(opts...) */

The following hints are currently supported:

  • adhoc - Run a write query as an adhoc query. The new query is added as a separate target alongside any existing targets on the table, preserving the existing default target. A direct rebuild of the table will still use the original default target, not the adhoc one.
/*+ adhoc */
CREATE OR REPLACE TABLE `/path/to/table` AS SELECT * FROM `/path/to/source`;

Iceberg references

Foundry SQL supports Iceberg references to query metadata. Review the official documentation ↗.

-- examples
SELECT * FROM `/path/to/table/`.files;

SELECT * FROM `/path/to/table/`.snapshots;

Introspection

Use SHOW FUNCTIONS to list the functions supported by Foundry SQL. The statement returns three columns: name, kind (one of scalar, aggregate, or window), and signature. An optional LIKE pattern filters by function name, where % matches any sequence of characters and _ matches a single character.

SHOW FUNCTIONS;
SHOW FUNCTIONS LIKE 'abs%';

Functions

Functions are available to transforms data in more complex ways. Refer to the Spark SQL documentation ↗ for detailed information on each function below.

Array functions

Function Description Example Furnace Ontology
array(expr, ...) Create an array with given elements SELECT array(1, 2, 3) Supported Supported
array_distinct(array) Remove duplicate values from array SELECT array_distinct(tags) FROM table Supported Not supported
array_intersect(array1, array2) Elements in both arrays without duplicates SELECT array_intersect(arr1, arr2) FROM table Supported Not supported
array_union(array1, array2) Elements in either array without duplicates SELECT array_union(arr1, arr2) FROM table Supported Not supported
array_except(array1, array2) Elements in array1 but not in array2 SELECT array_except(arr1, arr2) FROM table Supported Not supported
array_join(array, delimiter [, nullReplacement]) Concatenate array elements with delimiter SELECT array_join(tags, ',') FROM table Supported Not supported
array_max(array) Maximum value in array SELECT array_max(ARRAY(1, 5, 3)) Supported Not supported
array_min(array) Minimum value in array SELECT array_min(ARRAY(1, 5, 3)) Supported Not supported
array_position(array, element) Position of first occurrence (1-based, 0 if not found) SELECT array_position(tags, 'item') FROM table Supported Not supported
array_remove(array, element) Remove all matching elements SELECT array_remove(tags, 'old') FROM table Supported Not supported
array_size(array) Number of elements in array SELECT array_size(tags) FROM table Supported Not supported
array_contains(array, value) True if array contains value SELECT array_contains(tags, 'new') FROM table Supported Not supported
array_repeat(element, count) Create array with element repeated count times SELECT array_repeat('x', 5) Supported Not supported
arrays_overlap(array1, array2) True if arrays have common non-null elements SELECT arrays_overlap(arr1, arr2) FROM table Supported Not supported
flatten(arrayOfArrays) Flatten nested arrays into single array SELECT flatten(nested_arrays) FROM table Supported Not supported
sequence(start, stop, step) Generate sequence of integers SELECT sequence(1, 10, 2) Supported Not supported
shuffle(array) Random permutation of array SELECT shuffle(tags) FROM table Supported Not supported
slice(array, start, length) Subset of array from start with length SELECT slice(tags, 1, 2) FROM table Supported Not supported
sort_array(array[, ascendingOrder]) Sort array in ascending (default) or descending order SELECT sort_array(tags) FROM table Supported Not supported
array_sort(array) Sort array in ascending order with nulls last SELECT array_sort(tags) FROM table Supported Not supported
get(array, index) Returns element at given index (0-based) SELECT get(tags, 0) FROM table Supported Not supported
element_at(array, index) Returns element at given index (1-based; negative counts from the end) SELECT element_at(tags, 1) FROM table Supported Not supported
size(array) Number of elements in array (also accepts a map) SELECT size(tags) FROM table Supported Not supported
cardinality(array) Number of elements in array (also accepts a map) SELECT cardinality(tags) FROM table Supported Not supported
explode(array) Generates a new row for each element in the array SELECT explode(tags) FROM table Supported Not supported

Map functions

Function Description Example Furnace Ontology
map(key1, value1, key2, value2, ...) Create a map from key-value pairs SELECT map('a', 1, 'b', 2) Supported Not supported
map_keys(map) Returns an array of the map's keys SELECT map_keys(attributes) FROM table Supported Not supported
map_values(map) Returns an array of the map's values SELECT map_values(attributes) FROM table Supported Not supported
element_at(map, key) Returns the value for the given key SELECT element_at(attributes, 'color') FROM table Supported Not supported
size(map) Number of entries in map (also accepts an array) SELECT size(attributes) FROM table Supported Not supported
cardinality(map) Number of entries in map (also accepts an array) SELECT cardinality(attributes) FROM table Supported Not supported

Struct functions

Function Description Example Furnace Ontology
struct(expr1, expr2, ...) Create a struct from expressions SELECT struct('red', 'large', 2.0) Supported Not supported

Date and timestamp functions

Function Description Example Furnace Ontology
current_timestamp() Returns current timestamp SELECT current_timestamp() Supported Supported
current_date() Returns current date SELECT current_date() Supported Supported
now() Returns current timestamp (alias for current_timestamp) SELECT now() Supported Not supported
add_months(date, num_months) Add months to date add_months(order_date, 3) Supported Not supported
date_add(expr, num_days) Add days to date date_add(order_date, 7) Supported Supported
date_format(expr, format) Format date or timestamp as a string SELECT date_format(TIMESTAMP '2024-01-15 10:30:00', 'yyyy-MM-dd') Supported Not supported
date_sub(expr, num_days) Subtract days from date date_sub(order_date, 7) Supported Supported
from_unixtime(expr [, format]) Convert Unix timestamp to timestamp from_unixtime(1609459200) Supported Not supported
unix_timestamp([expr] [, format]) Convert timestamp to Unix timestamp unix_timestamp(current_timestamp()) Supported Not supported
day(date) Extract day of month from date SELECT day(order_date) FROM table Supported Supported
hour(timestamp) Extract hour from timestamp SELECT hour(order_timestamp) FROM table Supported Not supported
minute(timestamp) Extract minute from timestamp SELECT minute(order_timestamp) FROM table Supported Not supported
month(date) Extract month from date SELECT month(order_date) FROM table Supported Supported
quarter(date) Extract quarter from date SELECT quarter(order_date) FROM table Supported Supported
second(timestamp) Extract second from timestamp SELECT second(order_timestamp) FROM table Supported Not supported
year(date) Extract year from date SELECT year(order_date) FROM table Supported Supported
date_trunc(unit, timestamp) Truncate timestamp to specified unit SELECT date_trunc('day', current_timestamp()) Supported Not supported
trunc(date, fmt) Truncate date to YEAR, QUARTER, MONTH, or WEEK granularity SELECT trunc(order_date, 'MM') Supported Not supported
to_date(str [, format]) Convert string to date SELECT to_date('2024-01-15') Supported Not supported
to_timestamp(expr [, format]) Convert to timestamp SELECT to_timestamp('2024-01-15 10:30:00') Supported Not supported
timestamp(expr) Short-form cast to timestamp SELECT timestamp('2024-01-15 10:30:00') Supported Not supported
from_utc_timestamp(ts, tz) Interpret a UTC timestamp in the given timezone SELECT from_utc_timestamp(ts, 'America/Los_Angeles') Supported Not supported
to_utc_timestamp(ts, tz) Convert a timestamp in the given timezone to UTC SELECT to_utc_timestamp(ts, 'America/Los_Angeles') Supported Not supported
dayofmonth(date) Extract day of month from date (alias for day) SELECT dayofmonth(order_date) FROM table Supported Not supported
dayofweek(date) Day of week (1 = Sunday, 7 = Saturday) SELECT dayofweek(order_date) FROM table Supported Not supported
weekday(date) Day of week (0 = Monday, 6 = Sunday) SELECT weekday(order_date) FROM table Supported Not supported
dayofyear(date) Day of year (1 to 366) SELECT dayofyear(order_date) FROM table Supported Not supported
weekofyear(date) ISO week of year SELECT weekofyear(order_date) FROM table Supported Not supported
last_day(date) Last day of the month containing date SELECT last_day(order_date) FROM table Supported Not supported
next_day(start_date, day_of_week) First date after start_date that falls on the named day SELECT next_day(order_date, 'Mon') FROM table Supported Not supported
date_diff(date1, date2) Subtract two dates SELECT date_diff('2020-01-01', '2020-01-02') Supported Supported
months_between(ts1, ts2 [, roundOff]) Fractional months between two timestamps (rounds to eight digits unless roundOff is false) SELECT months_between('1997-02-28 10:30:00', '1996-10-30') Supported Not supported
timestamp_seconds(seconds) Timestamp from seconds since the UTC epoch (accepts fractional values) SELECT timestamp_seconds(1230219000) Supported Not supported
timestamp_millis(millis) Timestamp from milliseconds since the UTC epoch SELECT timestamp_millis(1230219000123) Supported Not supported
timestamp_micros(micros) Timestamp from microseconds since the UTC epoch SELECT timestamp_micros(1230219000123123) Supported Not supported
unix_seconds(timestamp) Seconds since 1970-01-01 UTC (truncates higher precision) SELECT unix_seconds(current_timestamp()) Supported Not supported
unix_millis(timestamp) Milliseconds since 1970-01-01 UTC (truncates higher precision) SELECT unix_millis(current_timestamp()) Supported Not supported
unix_micros(timestamp) Microseconds since 1970-01-01 UTC SELECT unix_micros(current_timestamp()) Supported Not supported

Mathematical functions

Function Description Example Furnace Ontology
abs(x) Absolute value SELECT abs(-5) Supported Supported
acos(x) Arc cosine SELECT acos(0.5) Supported Not supported
asin(x) Arc sine SELECT asin(0.5) Supported Not supported
atan(x) Arc tangent SELECT atan(1) Supported Not supported
atan2(y, x) Arc tangent of y/x SELECT atan2(1, 1) Supported Not supported
cbrt(x) Cube root SELECT cbrt(27) Supported Not supported
ceil(x) Round up to nearest integer SELECT ceil(3.7) Supported Supported
ceiling(x) Round up to nearest integer SELECT ceiling(3.7) Supported Supported
cos(x) Cosine SELECT cos(pi()) Supported Not supported
cosh(x) Hyperbolic cosine SELECT cosh(0) Supported Not supported
degrees(x) Convert radians to degrees SELECT degrees(pi()) Supported Not supported
e() Euler's number SELECT e() Supported Not supported
exp(x) e raised to power x SELECT exp(1) Supported Not supported
floor(x) Round down to nearest integer SELECT floor(3.7) Supported Supported
isnan(x) True if x is NaN (NULL input returns false) SELECT isnan(price) FROM table Supported Not supported
nanvl(x, y) Returns x unless it is NaN, in which case returns y SELECT nanvl(price, 0) FROM table Supported Not supported
ln(x) Natural logarithm SELECT ln(e()) Supported Not supported
log(base, x) Logarithm with specified base SELECT log(2, 8) Supported Not supported
log10(x) Base 10 logarithm SELECT log10(100) Supported Not supported
log2(x) Base 2 logarithm SELECT log2(8) Supported Not supported
mod(n, m) Modulus (remainder) SELECT mod(10, 3) Supported Supported
pi() Pi constant SELECT pi() Supported Not supported
pow(x, p) x raised to power p SELECT pow(2, 3) Supported Supported
power(x, p) x raised to power p SELECT power(2, 3) Supported Supported
radians(x) Convert degrees to radians SELECT radians(180) Supported Not supported
rand([seed]) Random value between 0 and 1. Optional seed is honored on Spark but ignored on Trino. SELECT rand() Supported Not supported
random([seed]) Random value between 0 and 1. Optional seed is honored on Spark but ignored on Trino. SELECT random() Supported Not supported
round(x, d) Round to d decimal places SELECT round(3.7, 1) Supported Supported
sign(x) Sign function (-1, 0, 1) SELECT sign(-5) Supported Not supported
sin(x) Sine SELECT sin(pi() / 2) Supported Not supported
sinh(x) Hyperbolic sine SELECT sinh(0) Supported Not supported
sqrt(x) Square root SELECT sqrt(16) Supported Not supported
tan(x) Tangent SELECT tan(pi() / 4) Supported Not supported
tanh(x) Hyperbolic tangent SELECT tanh(0) Supported Not supported
bin(x) Binary (base-2) string representation of a long value (negatives use two's-complement 64-bit) SELECT bin(13) Supported Not supported

String functions

Function Description Example Furnace Ontology
chr(n) Returns character from Unicode code point SELECT chr(65) Supported Not supported
char(n) Returns character from code point (alias for chr) SELECT char(65) Supported Not supported
contains(str, substr) True if string contains substring SELECT contains('hello', 'ell') Supported Not supported
concat_ws(sep, str1, ...) Concatenate strings with separator SELECT concat_ws(',', 'a', 'b') Supported Not supported
concat(str1, ...) Concatenate strings SELECT concat('a', 'b') Supported Supported
length(str) Length of string in characters SELECT length('hello') Supported Supported
len(str) Length of string (alias for length); accepts string or binary SELECT len('hello') Supported Not supported
char_length(str) Length of string (alias for length) SELECT char_length('hello') Supported Supported
character_length(str) Length of string (alias for length) SELECT character_length('hello') Supported Supported
bit_length(expr) Bit length of string or binary data (UTF-8 byte length multiplied by 8 for strings) SELECT bit_length('Spark SQL') Supported Not supported
octet_length(expr) Byte length of string (UTF-8) or binary SELECT octet_length('Spark SQL') Supported Not supported
format_number(num, places_or_format) Format a number like '#,###.##' rounded to N places, or with a user-supplied decimal-format pattern SELECT format_number(12332.123456, 4) Supported Not supported
lower(str) Convert to lowercase SELECT lower('HELLO') Supported Supported
lcase(str) Convert to lowercase (alias for lower) SELECT lcase('HELLO') Supported Not supported
upper(str) Convert to uppercase SELECT upper('hello') Supported Supported
ucase(str) Convert to uppercase (alias for upper) SELECT ucase('hello') Supported Not supported
initcap(str) Capitalize the first letter of each word SELECT initcap('hello world') Supported Not supported
lpad(str, len, pad) Left pad string to length SELECT lpad('hi', 5, 'x') Supported Not supported
rpad(str, len, pad) Right pad string to length SELECT rpad('hi', 5, 'x') Supported Not supported
ltrim(str) Remove leading whitespace SELECT ltrim(' hello') Supported Not supported
rtrim(str) Remove trailing whitespace SELECT rtrim('hello ') Supported Not supported
trim(str) Remove leading and trailing whitespace SELECT trim(' hello ') Supported Supported
replace(str, search, replace) Replace all occurrences SELECT replace('hello', 'l', 'x') Supported Supported
reverse(str) Reverse a string (also works on arrays) SELECT reverse('hello') Supported Not supported
right(str, len) Rightmost len characters of string SELECT right('hello', 3) Supported Not supported
substr(str, pos, len) Extract substring SELECT substr('hello', 2, 3) Supported Supported
substring(str, pos, len) Extract substring SELECT substring('hello', 2, 3) Supported Supported
substring_index(str, delim, count) Substring before count occurrences of the delimiter (negative counts from the end) SELECT substring_index('a.b.c.d', '.', 2) Supported Not supported
split(str, delimiter) Split string into array SELECT split('a,b,c', ',') Supported Not supported
split_part(str, delimiter, index) Get part from split string (1-based) SELECT split_part('a,b,c', ',', 2) Supported Not supported
instr(str, substr) Find substring position (1-based, 0 if not found) SELECT instr('hello', 'll') Supported Not supported
locate(substr, str [, pos]) Find substring position starting at pos (1-based, 0 if not found) SELECT locate('ll', 'hello') Supported Not supported
position(substr IN str) Find substring position (SQL standard syntax) SELECT position('ll' IN 'hello') Supported Not supported
startswith(str, prefix) Check if string starts with prefix SELECT startswith('hello', 'he') Supported Not supported
luhn_check(str) Validate string using Luhn algorithm SELECT luhn_check('79927398713') Supported Not supported

Conditional functions

Function Description Example Furnace Ontology
coalesce(expr1, expr2, ...) Returns first non-null expression SELECT coalesce(col, 'default') Supported Supported
nullif(expr1, expr2) Returns null if expr1 equals expr2, otherwise returns expr1 SELECT 100.0 / nullif(count, 0) Supported Not supported
if(cond, then, else) Returns then if cond is true, otherwise else SELECT if(price > 100, 'high', 'low') Supported Not supported
nvl(expr1, expr2) Returns expr2 if expr1 is null, otherwise expr1 SELECT nvl(col, 'default') Supported Not supported
ifnull(expr1, expr2) Alias for nvl SELECT ifnull(col, 'default') Supported Not supported
nvl2(expr1, expr2, expr3) Returns expr2 if expr1 is not null, otherwise expr3 SELECT nvl2(col, 'present', 'missing') Supported Not supported
greatest(expr1, expr2, ...) Largest non-null value (null only when all inputs are null) SELECT greatest(a, b, c) FROM table Supported Not supported
least(expr1, expr2, ...) Smallest non-null value (null only when all inputs are null) SELECT least(a, b, c) FROM table Supported Not supported
regexp_like(str, pattern) Test if string matches regex pattern SELECT regexp_like('hello123', '[0-9]+') Supported Not supported
regexp_extract(str, pattern) Extract first substring matching pattern SELECT regexp_extract('hello123', '[0-9]+') Supported Supported
regexp_extract_all(str, pattern) Extract all substrings matching pattern SELECT regexp_extract_all('a1b2c3', '[0-9]+') Supported Not supported
regexp_replace(str, pattern, replacement) Replace all matches with replacement string SELECT regexp_replace('hello123', '[0-9]+', 'X') Supported Supported
regexp_count(str, pattern) Count occurrences of pattern in string SELECT regexp_count('a1b2c3', '[0-9]+') Supported Not supported
rlike(str, regexp) True if string matches the Java regex (partial match) SELECT rlike('hello123', '\\d+') Supported Not supported
regexp(str, regexp) Alias for rlike SELECT regexp('hello123', '\\d+') Supported Not supported

Hashing functions

Function Description Example Furnace Ontology
md5(expr) MD5 128-bit checksum as a lowercase hex string SELECT md5('Spark') Supported Not supported
sha(expr) SHA-1 hash as a hex string (alias for sha1) SELECT sha('Spark') Supported Not supported
sha1(expr) SHA-1 hash as a hex string SELECT sha1('Spark') Supported Not supported
sha2(expr, bitLength) SHA-2 hash (224, 256, 384, or 512; 0 means 256). Invalid bit length returns null SELECT sha2('Spark', 256) Supported Not supported
crc32(expr) CRC32 checksum as a bigint SELECT crc32('Spark') Supported Not supported

Conversion functions

Function Description Example Furnace Ontology
cast(expr AS type) Convert value to specified type cast(count() AS float) Supported Supported
try_cast(expr AS type) Convert value to specified type, returns NULL on failure try_cast('abc' AS int) Supported Not supported

Aggregate functions

Function Description Example Furnace Ontology
count() Count all rows SELECT count() FROM table Supported Supported
count(DISTINCT col) Count distinct values in column SELECT count(DISTINCT id) FROM table Supported Supported if single property
approx_count_distinct(col) Approximate count of distinct values (HyperLogLog) SELECT approx_count_distinct(id) FROM table Supported Not supported
sum(col) Sum values in column SELECT sum(cost) FROM table Supported Supported
avg(col) Mean average values in column SELECT avg(weight) FROM table Supported Supported
mean(col) Mean average values in column (alias for avg) SELECT mean(weight) FROM table Supported Not supported
min(col) Minimum value in column SELECT min(price) FROM table Supported Supported
max(col) Maximum value in column SELECT max(price) FROM table Supported Supported
min_by(x, y) Value of x associated with minimum value of y SELECT min_by(name, age) FROM table Supported Not supported
max_by(x, y) Value of x associated with maximum value of y SELECT max_by(name, age) FROM table Supported Not supported
first(col[, ignoreNulls]) First value in a group (always nullable) SELECT first(name) FROM table GROUP BY category Supported Not supported
last(col[, ignoreNulls]) Last value in a group (always nullable) SELECT last(name) FROM table GROUP BY category Supported Not supported
count_if(condition) Count rows where condition is true SELECT count_if(age > 18) FROM table Supported Not supported
bool_and(condition) True if all values are true SELECT bool_and(active) FROM table Supported Not supported
bool_or(condition) True if any value is true SELECT bool_or(active) FROM table Supported Not supported
bit_and(col) Bitwise AND of all non-null integer values SELECT bit_and(flags) FROM table Supported Not supported
bit_or(col) Bitwise OR of all non-null integer values SELECT bit_or(flags) FROM table Supported Not supported
bit_xor(col) Bitwise XOR of all non-null integer values SELECT bit_xor(flags) FROM table Supported Not supported
every(condition) True if all values are true (alias for bool_and) SELECT every(active) FROM table Supported Not supported
stddev(col) Sample standard deviation SELECT stddev(salary) FROM table Supported Supported
stddev_pop(col) Population standard deviation SELECT stddev_pop(salary) FROM table Supported Supported
stddev_samp(col) Sample standard deviation (alias for stddev) SELECT stddev_samp(salary) FROM table Supported Supported
variance(col) Sample variance SELECT variance(salary) FROM table Supported Not supported
var_pop(col) Population variance SELECT var_pop(salary) FROM table Supported Not supported
var_samp(col) Sample variance SELECT var_samp(salary) FROM table Supported Not supported
corr(y, x) Pearson correlation coefficient SELECT corr(sales, ads) FROM table Supported Not supported
covar_pop(y, x) Population covariance SELECT covar_pop(y, x) FROM table Supported Not supported
covar_samp(y, x) Sample covariance SELECT covar_samp(y, x) FROM table Supported Not supported
skewness(col) Skewness SELECT skewness(values) FROM table Supported Not supported
regr_intercept(y, x) Linear regression intercept SELECT regr_intercept(y, x) FROM table Supported Not supported
regr_slope(y, x) Linear regression slope SELECT regr_slope(y, x) FROM table Supported Not supported
grouping(col) Returns 1 if column is aggregated, 0 otherwise (for use with ROLLUP/CUBE/GROUPING SETS) SELECT grouping(category) FROM table GROUP BY ROLLUP(category) Supported Not supported
collect_set(col) Returns an array of unique values from column SELECT collect_set(name) FROM table GROUP BY category Supported Not supported
collect_list(col) Returns an array of all values from column (preserves duplicates) SELECT collect_list(name) FROM table GROUP BY category Supported Not supported
percentile(col, p[, freq]) Exact percentile of column for percentage p (scalar or array) SELECT percentile(salary, 0.5) FROM table Supported Not supported
percentile_approx(col, p[, accuracy]) Approximate percentile; preserves input type (numeric, date, timestamp, interval) SELECT percentile_approx(salary, ARRAY(0.25, 0.5, 0.75)) FROM table Supported Not supported
approx_percentile(col, p[, accuracy]) Alias for percentile_approx SELECT approx_percentile(salary, 0.5) FROM table Supported Not supported

Window functions

Function Description Example Furnace Ontology
ROW_NUMBER() Assigns sequential row number within partition ROW_NUMBER() OVER (ORDER BY col) Supported Supported
RANK() Rank with gaps for ties RANK() OVER (ORDER BY salary DESC) Supported Supported
DENSE_RANK() Rank without gaps for ties DENSE_RANK() OVER (ORDER BY salary DESC) Supported Supported
PERCENT_RANK() Percentage ranking of value in group PERCENT_RANK() OVER (ORDER BY salary) Supported Not supported
CUME_DIST() Cumulative distribution of value CUME_DIST() OVER (ORDER BY salary) Supported Not supported
NTILE(n) Divide rows into n buckets NTILE(4) OVER (ORDER BY salary) Supported Not supported
LAG(expr[, offset[, default]]) Value from previous row LAG(salary, 1) OVER (ORDER BY date) Supported Supported
LEAD(expr[, offset[, default]]) Value from next row LEAD(salary, 1) OVER (ORDER BY date) Supported Supported
FIRST_VALUE(expr) First value in window frame FIRST_VALUE(salary) OVER (ORDER BY date) Supported Supported
LAST_VALUE(expr) Last value in window frame LAST_VALUE(salary) OVER (ORDER BY date) Supported Supported
NTH_VALUE(expr, n) Nth value in window frame NTH_VALUE(salary, 2) OVER (ORDER BY date) Supported Supported
Aggregate functions Standard aggregates (count, sum, avg, min, max, stddev, variance, etc.) can be used as window functions SUM(amount) OVER (PARTITION BY customer_id) Supported Only count, sum, min max, avg

Window functions require an OVER clause with optional PARTITION BY and ORDER BY:

-- examples
SELECT id, name, ROW_NUMBER() OVER (ORDER BY name) as row_num
FROM `/path/to/table`;

SELECT category, name, price,
       ROW_NUMBER() OVER (PARTITION BY category ORDER BY price DESC) as rank_in_category
FROM `/path/to/products`;

SELECT order_id, customer_id, order_date, total_amount,
       SUM(total_amount) OVER (PARTITION BY customer_id ORDER BY order_date) as running_total,
        AVG(total_amount) OVER (PARTITION BY customer_id) as customer_avg,
        COUNT(*) OVER (PARTITION BY customer_id) as customer_order_count
FROM `/path/to/orders`;

SELECT id, amount,
       ROW_NUMBER() OVER w as sequence_num
FROM `/path/to/transactions`
WINDOW w AS (PARTITION BY customer_id ORDER BY transaction_date);

JSON functions

Function Description Example Furnace Ontology
get_json_object(json_str, path) Extract a value from a JSON string using a JSONPath expression SELECT get_json_object('{"a":1}', '$.a') Supported Not supported
json_array_length(json_str) Returns the number of elements in a JSON array SELECT json_array_length('[1,2,3]') Supported Not supported
json_object_keys(json_str) Returns the keys of a JSON object as an array SELECT json_object_keys('{"a":1,"b":2}') Supported Not supported
to_json(expr) Convert a struct or map to a JSON string SELECT to_json(map('a', 1)) Supported Not supported

Misc functions

Function Description Example Furnace Ontology
zorder() Iceberg Z-order optimization (Iceberg only) CALL zorder('table', 'col1,col2') Supported Not supported
typeof(expr) Returns the runtime type name of the expression SELECT typeof(price) FROM table Supported Not supported

Geospatial functions

Function Description Example Furnace Ontology
st_point(x, y) Creates a point geometry from X, Y coordinates SELECT st_point(-122.4, 37.8) Supported Not supported
st_contains(geom1, geom2) Returns true if geom1 contains geom2 WHERE st_contains(boundary, location) Supported Not supported
st_equals(geom1, geom2) Returns true if geometries are spatially equal SELECT st_equals(point1, point2) Supported Not supported
st_geomfromgeojson(json) Creates geometry from GeoJSON string SELECT st_geomfromgeojson('{"type":"Point","coordinates":[0,0]}') Supported Not supported
st_astext(geom) Returns geometry as Well-Known Text (WKT) string SELECT st_astext(st_point(0, 0)) Supported Not supported
st_h3cellids(geom, level, fullCover) Returns an array of H3 cells that cover the geometry SELECT st_h3cellids(st_point(0, 0), 8, true) Supported Not supported

:::callout{theme="neutral"} Geometry columns cannot be returned directly. Use st_astext() to convert to WKT format. :::

Arithmetic operators

Operator Description Example Furnace Ontology
+ Plus SELECT count() + 5 Supported Supported
- Minus SELECT count() - 1 Supported Supported
* Times SELECT sum(weight * 2) Supported Supported
/ Divide SELECT (avg(a) + avg(b)) / 2 Supported Supported

Boolean operators

Operator Description Example Furnace Ontology
= Equal to WHERE id = 1 Supported Supported
<>, != Not equal to WHERE status <> 'inactive' Supported Supported
< Less than WHERE age < 18 Supported Supported
<= Less than or equal to WHERE price <= 100.0 Supported Supported
> Greater than WHERE salary > 50000 Supported Supported
>= Greater than or equal to WHERE score >= 90 Supported Supported
AND Logical AND WHERE active = true AND verified = true Supported Supported
OR Logical OR WHERE status = 'new' OR status = 'pending' Supported Supported
NOT Logical NOT WHERE NOT deleted Supported Supported
IN Value in list WHERE category IN ('A', 'B', 'C') Supported Supported
LIKE Pattern matching WHERE name LIKE 'John%' Supported Supported
EXISTS True if subquery returns rows WHERE EXISTS (SELECT 1 FROM orders WHERE orders.customer_id = customers.id) Supported Supported

Other expressions

Expression Description Example Furnace Ontology
CASE ... WHEN Switch expression SELECT CASE WHEN mark >= 70 THEN 'A' WHEN mark >= 60 THEN 'B' ELSE 'C' END Supported Supported
INTERVAL Time interval literal SELECT order_date + INTERVAL 30 DAY, WHERE created_at > current_timestamp() - INTERVAL 1 HOUR Supported Not supported

:::callout{theme="neutral"} The IF(condition, trueValue, falseValue) function is not supported in Ontology SQL. Use CASE WHEN condition THEN trueValue ELSE falseValue END instead. :::

Case-when statements

CASE WHEN condition THEN result
     [WHEN condition THEN result]
     [ELSE result]
END

Date/time intervals

INTERVAL { yearMonthIntervalQualifier | dayTimeIntervalQualifier }

yearMonthIntervalQualifier
 { YEAR [TO MONTH] |
   MONTH }

dayTimeIntervalQualifier
 { DAY [TO { HOUR | MINUTE | SECOND } ] |
   HOUR [TO { MINUTE | SECOND } ] |
   MINUTE [TO SECOND] |
   SECOND }
-- examples
INTERVAL '1' YEAR
INTERVAL '6' MONTH
INTERVAL '25' DAY
INTERVAL '6' YEAR TO MONTH

Data types

Type Spark equivalent Furnace Ontology
BOOLEAN BOOLEAN Supported Supported
SHORT SHORT Supported Supported
INT INT Supported Supported
LONG LONG Supported Supported
FLOAT FLOAT Supported Supported
DOUBLE DOUBLE Supported Supported
DATE DATE Supported Supported
TIMESTAMP TIMESTAMP Supported Supported
TIMESTAMP_NTZ TIMESTAMP_NTZ Supported Supported
STRING STRING Supported Supported
BINARY BINARY Supported Not supported
DECIMAL DECIMAL Supported Supported
MAP<key, value> MAP<key, value> Supported Not supported
ARRAY<element_type> ARRAY<element_type> Supported Supported
STRUCT<field1_name: field1_type, field2_name: field2_type, …> STRUCT<field1_name: field1_type, field2_name: field2_type, …> Supported Not supported
CREATE TABLE `/path/to/table` (
  col1 BOOLEAN,
  col2 SHORT,
  col3 INT,
  col4 LONG,
  col5 FLOAT,
  col6 DOUBLE,
  col7 DATE,
  col8 TIMESTAMP,
  col9 TIMESTAMP_NTZ,
  col10 STRING,
  col11 BINARY,
  col12 DECIMAL(10, 2),
  col13 MAP<STRING, STRING>,
  col14 ARRAY<STRING>,
  col15 STRUCT<col1: STRING, col2: STRING>
);

中文翻译

SQL 方言

Foundry SQL 通常是 Spark SQL ↗ 的一个子集,并遵循 ANSI 标准。

快速参考

常用操作

-- 创建表
CREATE TABLE `/path/to/table` (id INT, name STRING);

-- 插入数据
INSERT INTO `/path/to/table` VALUES (1, 'foo'), (2, 'bar');

-- 基本查询
SELECT * FROM `/path/to/table` WHERE id = 1;

-- 表连接
SELECT a.*, b.name
FROM `/path/to/table1` a
LEFT JOIN `/path/to/table2` b ON a.id = b.id;

-- 聚合
SELECT category, COUNT(*), AVG(price)
FROM `/path/to/products`
GROUP BY category
HAVING COUNT(*) > 10;

-- 窗口函数
SELECT name, salary,
       RANK() OVER (ORDER BY salary DESC) as rank
FROM `/path/to/employees`;

-- CTE(公用表表达式)
WITH summary AS (
  SELECT category, SUM(amount) as total
  FROM `/path/to/sales`
  GROUP BY category
)
SELECT * FROM summary WHERE total > 1000;

常用函数

类别 函数
聚合 count(), sum(), avg(), min(), max(), count(DISTINCT col), collect_set()
字符串 upper(), lower(), trim(), concat(), concat_ws(), contains(), split(), substr()
日期/时间 current_date(), current_timestamp(), date_add(), date_diff(), to_date(), year(), month(), day()
数学 abs(), round(), ceil(), floor(), pow(), sqrt(), mod()
数组 array(), array_contains(), array_size(), array_distinct(), explode(), flatten()
条件 CASE WHEN ... THEN ... END, coalesce(), nullif(), regexp_like(), regexp_extract()
窗口 ROW_NUMBER(), RANK(), DENSE_RANK(), LAG(), LEAD()

表引用

-- 按路径引用
SELECT * FROM `/path/to/table`;

-- 按 RID 引用
SELECT * FROM `ri.rid.for.table.1234`;

-- 指定分支
SELECT * FROM `/path/to/table`.branch_master;

关键语法说明

  • 子查询必须用括号包裹。
  • 默认创建的表为 Iceberg 格式,但可以使用 USING <format> 语法覆盖。
  • 只有 Iceberg 表支持 INSERTUPDATEDELETE 操作。

创建表

CREATE [ OR REPLACE ] TABLE table_identifier [ ( col_name1 col_type1, ... ) ] [ USING table_format ] [ PARTITIONED BY ( expr [, ...] ) ] [ TBLPROPERTIES ( 'key'='value' [, ...] ) ] [ AS select_statement ]

PARTITIONED BY 仅支持 Iceberg 表。TBLPROPERTIES 用于设置 Iceberg 表属性 ↗

如果未指定,表格式默认为 Iceberg。有效的表格式包括:

  • Iceberg: iceberg
  • Parquet: parquet
  • Avro: avro
-- 示例
CREATE TABLE `/path/to/table` AS
SELECT *
FROM `/path/to/table`;

CREATE TABLE `/path/to/table` USING parquet AS
SELECT *
FROM `/path/to/table`;

CREATE TABLE `/path/to/table`
(
    id   INT,
    name STRING
);

CREATE OR REPLACE TABLE `/path/to/table` (id INT, name STRING);

CREATE TABLE `/path/to/table` (id INT, ts TIMESTAMP)
    PARTITIONED BY (days(ts));

CREATE TABLE `/path/to/table` (id INT)
    TBLPROPERTIES ('write.format.default'='parquet');

CREATE TABLE `/path/to/table`
    PARTITIONED BY (id)
    AS SELECT * FROM `/path/to/source`;

修改表

Iceberg 表支持使用 INSERTUPDATEDELETE 语句进行数据追加。非 Iceberg 表目前不支持修改操作。

插入

INSERT [ INTO ] table_identifier { VALUES ( { value | NULL } [ , ... ] ) [ , ( ... ) ] | query }
-- 示例
INSERT INTO `/path/to/table` VALUES (1, 'foo'), (2, 'bar');

INSERT INTO `/path/to/table` SELECT id, name FROM `/path/to/table2`;

更新

UPDATE table_identifier
SET column_name = value [, ...]
[ WHERE condition ]
-- 示例
UPDATE `/path/to/table`
SET col = 'new value';

UPDATE `/path/to/table`
SET col = 'new value'
WHERE col = 'old value';

删除

DELETE FROM table_identifier WHERE condition
DELETE FROM `/path/to/table` WHERE col = 'to delete'

修改元数据

Iceberg 表支持使用 ALTER TABLE 语法进行模式和元数据的修改。更多详情请参考 Iceberg Spark DDL 文档 ↗

列操作

ALTER TABLE table_identifier ADD COLUMN col_name col_type [COMMENT 'comment']
ALTER TABLE table_identifier ADD COLUMNS (col_name col_type [COMMENT 'comment'], ...)
ALTER TABLE table_identifier DROP COLUMN col_name
ALTER TABLE table_identifier DROP COLUMNS (col_name, ...)
ALTER TABLE table_identifier RENAME COLUMN old_name TO new_name
ALTER TABLE table_identifier ALTER COLUMN col_name TYPE new_type
ALTER TABLE table_identifier ALTER COLUMN col_name COMMENT 'comment'
ALTER TABLE table_identifier ALTER COLUMN col_name SET NOT NULL
ALTER TABLE table_identifier ALTER COLUMN col_name DROP NOT NULL
-- 示例
ALTER TABLE `/path/to/table` ADD COLUMN new_col STRING;
ALTER TABLE `/path/to/table` ADD COLUMNS (col1 INT, col2 STRING COMMENT 'description');
ALTER TABLE `/path/to/table` DROP COLUMN old_col;
ALTER TABLE `/path/to/table` RENAME COLUMN old_name TO new_name;
ALTER TABLE `/path/to/table` ALTER COLUMN col2 TYPE BIGINT;

分区字段

ALTER TABLE table_identifier ADD PARTITION FIELD expr [AS alias]
ALTER TABLE table_identifier DROP PARTITION FIELD expr
ALTER TABLE table_identifier REPLACE PARTITION FIELD old_expr WITH new_expr [AS alias]
-- 示例
ALTER TABLE `/path/to/table` ADD PARTITION FIELD days(ts);
ALTER TABLE `/path/to/table` ADD PARTITION FIELD bucket(16, id) AS id_bucket;
ALTER TABLE `/path/to/table` DROP PARTITION FIELD days(ts);
ALTER TABLE `/path/to/table` REPLACE PARTITION FIELD days(ts) WITH hours(ts);

写入排序

ALTER TABLE table_identifier WRITE ORDERED BY col1 [ASC|DESC] [, ...]
ALTER TABLE table_identifier WRITE LOCALLY ORDERED BY col1 [ASC|DESC] [, ...]
ALTER TABLE table_identifier WRITE DISTRIBUTED BY PARTITION [LOCALLY ORDERED BY col1 [ASC|DESC] [, ...]]
ALTER TABLE table_identifier WRITE UNORDERED
-- 示例
ALTER TABLE `/path/to/table` WRITE ORDERED BY col1, col2 DESC;
ALTER TABLE `/path/to/table` WRITE DISTRIBUTED BY PARTITION LOCALLY ORDERED BY col1;
ALTER TABLE `/path/to/table` WRITE UNORDERED;

标识符字段

ALTER TABLE table_identifier SET IDENTIFIER FIELDS col1 [, ...]
ALTER TABLE table_identifier DROP IDENTIFIER FIELDS col1 [, ...]
-- 示例
ALTER TABLE `/path/to/table` SET IDENTIFIER FIELDS col1, col2;
ALTER TABLE `/path/to/table` DROP IDENTIFIER FIELDS col1, col2;

表属性

ALTER TABLE table_identifier SET TBLPROPERTIES ('key1'='value1' [, ...])
ALTER TABLE table_identifier UNSET TBLPROPERTIES ('key1' [, ...])
-- 示例
ALTER TABLE `/path/to/table` SET TBLPROPERTIES ('write.format.default'='parquet');
ALTER TABLE `/path/to/table` SET TBLPROPERTIES ('write.format.default'='parquet', 'write.parquet.compression-codec'='zstd');
ALTER TABLE `/path/to/table` UNSET TBLPROPERTIES ('write.format.default');

标签

ALTER TABLE table_identifier CREATE TAG [IF NOT EXISTS] tag_name [AS OF VERSION snapshot_id] [RETAIN n DAYS]
ALTER TABLE table_identifier CREATE OR REPLACE TAG tag_name [AS OF VERSION snapshot_id] [RETAIN n DAYS]
ALTER TABLE table_identifier DROP TAG [IF EXISTS] tag_name
ALTER TABLE table_identifier REPLACE TAG tag_name [AS OF VERSION snapshot_id] [RETAIN n DAYS]
-- 示例
ALTER TABLE `/path/to/table` CREATE TAG v1 AS OF VERSION 42;
ALTER TABLE `/path/to/table` CREATE TAG IF NOT EXISTS v1 AS OF VERSION 42 RETAIN 7 DAYS;
ALTER TABLE `/path/to/table` CREATE OR REPLACE TAG v2 AS OF VERSION 100;
ALTER TABLE `/path/to/table` DROP TAG IF EXISTS v1;
ALTER TABLE `/path/to/table` REPLACE TAG v1 AS OF VERSION 50 RETAIN 14 DAYS;

分支

ALTER TABLE table_identifier CREATE BRANCH [IF NOT EXISTS] branch_name [AS OF VERSION snapshot_id] [RETAIN n DAYS] [WITH SNAPSHOT RETENTION n SNAPSHOTS [n DAYS]]
ALTER TABLE table_identifier CREATE OR REPLACE BRANCH branch_name [AS OF VERSION snapshot_id] [RETAIN n DAYS] [WITH SNAPSHOT RETENTION n SNAPSHOTS [n DAYS]]
ALTER TABLE table_identifier DROP BRANCH [IF EXISTS] branch_name
ALTER TABLE table_identifier REPLACE BRANCH branch_name [AS OF VERSION snapshot_id] [RETAIN n DAYS]
-- 示例
ALTER TABLE `/path/to/table` CREATE BRANCH feature_branch;
ALTER TABLE `/path/to/table` CREATE BRANCH IF NOT EXISTS my_branch AS OF VERSION 42 RETAIN 7 DAYS;
ALTER TABLE `/path/to/table` CREATE OR REPLACE BRANCH my_branch AS OF VERSION 10;
ALTER TABLE `/path/to/table` DROP BRANCH IF EXISTS old_branch;
ALTER TABLE `/path/to/table` REPLACE BRANCH my_branch AS OF VERSION 5 RETAIN 30 DAYS;

事务块

事务块将多个语句组合在一起,作为一个原子作业执行。

BEGIN;
statement1;
statement2;
...
COMMIT;

规则:

  • BEGIN/COMMIT 内的所有语句在同一个作业中一起执行。
  • 事务块不能嵌套。
  • 事务块必须包含至少一个写入语句。
  • BEGIN 没有 COMMITCOMMIT 没有 BEGIN 将导致解析错误。
-- 原子性地创建并填充表
BEGIN;
CREATE OR REPLACE TABLE `/path/to/table` (id INT, name STRING);
INSERT INTO `/path/to/table` VALUES (1, 'foo'), (2, 'bar');
COMMIT;

-- 作为单个作业向多个表插入数据
BEGIN;
INSERT INTO `/path/to/table1` SELECT * FROM `/path/to/source`;
INSERT INTO `/path/to/table2` SELECT * FROM `/path/to/source`;
COMMIT;

查询组合

[ WITH with_query [ , ... ] ]
select_statement
  [ { UNION | INTERSECT | EXCEPT } select_statement, ... ]
  [ ORDER BY { expression [ ASC | DESC ] [ NULLS { FIRST | LAST } ] [, ...] } ]
  [ WINDOW { window_name AS ( window_spec ) [, ...] } ]
  [ LIMIT { ALL | expression } ]

WITH 子句(公用表表达式)

其中 with_query 为:

cte_name [ ( column_name [, ...] ) ] AS ( select_statement )
-- 示例
WITH sales_summary (cust_id, total_sales, order_count) AS (
  SELECT customer_id, SUM(total_amount), COUNT(*)
  FROM `/data/orders`
  GROUP BY customer_id
)
SELECT * FROM sales_summary WHERE total_sales > 10000;

SELECT 语句

其中 select_statement 为:

SELECT [ ALL | DISTINCT ] { [ named_expression | regex_column_names | * ] [, ...] }
  FROM { from_item [, ...] }
  [ WHERE boolean_expression ]
  [ GROUP BY { expression | ROLLUP(...) | CUBE(...) | GROUPING SETS(...) } [ , ... ] ]
  [ HAVING boolean_expression ]
  [ LIMIT { ALL | expression } [ OFFSET expression ] ]

命名表达式

其中 named_expression 为以下之一:

带可选别名的表达式:

expression [ AS alias ]
-- 示例
SELECT count(*) AS total_count FROM `/path/to/table`;

SELECT name, salary * 1.1 AS adjusted_salary FROM `/path/to/employees`;

窗口函数:

window_function() OVER ( window_spec )
window_function() OVER window_name

其中 window_spec 为:

[ PARTITION BY expression [, ...] ]
[ ORDER BY expression [ ASC | DESC ] [ NULLS { FIRST | LAST } ] [, ...] ]
-- 示例
SELECT name, ROW_NUMBER() OVER (ORDER BY salary DESC) as rank
FROM `/path/to/employees`;

SELECT category, product_id,
       ROW_NUMBER() OVER (PARTITION BY category ORDER BY price DESC) as category_rank
FROM `/path/to/products`;

-- 使用命名窗口
SELECT customer_id, order_date, total,
       ROW_NUMBER() OVER w as order_sequence
FROM `/path/to/orders`
WINDOW w AS (PARTITION BY customer_id ORDER BY order_date);

FROM 子句

其中 from_item 为以下之一:

表引用

表可以通过文件系统路径或资源标识符(RID)进行引用。两者都支持可选的分支规范,用于查询特定的 Iceberg 分支。包含特殊字符的分支名称需要使用反引号进行转义。

{ `/path/to/table`[.branch_<branch name>] | `ri.rid.for.table.1234`[.branch_<branch name>] }
-- 示例
SELECT * FROM `/path/to/table`;

SELECT * FROM `/path/to/table`.branch_master;

SELECT * FROM `/path/to/table` WHERE id = 1;

SELECT * FROM `ri.rid.for.table.1234`;

SELECT * FROM `ri.rid.for.table`.`branch_abc\xyz`;

可以使用 VERSION AS OF 语法进行时间旅行查询,对于 Iceberg 表使用快照 ID,对于 Foundry 数据集使用事务 RID。

-- 示例
SELECT * FROM `/path/to/table` VERSION AS OF `ri.foundry.main.transaction.1234`;

SELECT * FROM `/path/to/table` VERSION AS OF 1234567;

对于 Foundry 数据集,可以使用 VERSION BETWEEN ... AND ... 读取在某个事务范围内添加的行。该查询返回严格在起始事务之后、截至结束事务(含)添加的行。起始边界是排他的,结束边界是包含的。两个边界都必须是事务 RID,而不是快照 ID。

-- 在 txn.1234 之后追加的行,截至并包括 txn.5678
SELECT * FROM `/path/to/table`
    VERSION BETWEEN `ri.foundry.main.transaction.1234` AND `ri.foundry.main.transaction.5678`;

VERSION BETWEEN 不支持 Iceberg 表。其他时间旅行语法目前也不支持。

连接

relation
  { CROSS | LEFT [ OUTER ] | RIGHT [ OUTER ] | FULL [ OUTER ] }
  JOIN relation
  ON boolean_expression
-- 示例
SELECT * FROM `/path/to/table` LEFT JOIN `/path/to/table2` ON id = id2;

SELECT * FROM `/path/to/table` RIGHT OUTER JOIN `/path/to/table2` ON id = id2;

子查询

( select_statement )
-- 示例
SELECT * FROM (SELECT * FROM `/path/to/table`);

SELECT * FROM (SELECT * FROM `/path/to/table`) WHERE id = 1;

SELECT * FROM (
  VALUES (9001, 1001, 1, DATE '2023-06-01', 'credit_card', 1329.98, 'completed',
          TIMESTAMP '2023-06-01 11:00:00')
) AS orders(order_id, customer_id, product_id, order_date, payment_method, total, status, created_at);

:::callout{theme="neutral"} 子查询必须用括号包裹。即使在 Spark SQL 中并非总是必需,但在 Foundry 中强制执行此规则。 :::

完整查询示例

-- 带连接和过滤的复杂查询
SELECT DISTINCT o.order_id,
                c.name,
                o.total_amount,
                p.category
FROM `/data/orders` o
         LEFT JOIN `/data/customers` c ON o.customer_id = c.id
         CROSS JOIN (SELECT product_id, category
                     FROM `/data/products`
                     WHERE category = 'Electronics') p ON o.product_id = p.product_id
WHERE o.order_date > DATE '2024-01-01'
  AND o.total_amount > 1000.0
ORDER BY o.total_amount DESC
LIMIT 100;

-- 带 HAVING 子句的聚合
SELECT c.name,
       COUNT(o.order_id) as order_count,
       SUM(o.total_amount) as total_spent
FROM `/data/customers` c
       LEFT JOIN `/data/orders` o ON c.id = o.customer_id
GROUP BY c.name
HAVING COUNT(o.order_id) > 0;

-- 带多个连接的 UNION 查询
SELECT emp_id, name, dept, 'Active' AS status
FROM `/hr/employees` e
         LEFT OUTER JOIN `/hr/departments` d ON e.dept_id = d.id
WHERE e.active = true
UNION
SELECT emp_id, name, dept, 'Inactive' AS status
FROM (SELECT *
      FROM `/hr/employees`
      WHERE termination_date < current_timestamp()) e
         RIGHT JOIN `/hr/departments` d ON e.dept_id = d.id
WHERE e.active = false
ORDER BY dept ASC, status DESC NULLS LAST;

SQL 提示

SQL 提示为查询引擎提供额外的指令。提示使用以下语法指定:

/*+ hint1, hint2(opts...) */

目前支持以下提示:

  • adhoc - 将写入查询作为临时查询运行。新查询作为单独的目标添加到表上的任何现有目标旁边,保留现有的默认目标。直接重建表仍将使用原始默认目标,而不是临时目标。
/*+ adhoc */
CREATE OR REPLACE TABLE `/path/to/table` AS SELECT * FROM `/path/to/source`;

Iceberg 引用

Foundry SQL 支持 Iceberg 引用来查询元数据。请查阅 官方文档 ↗

-- 示例
SELECT * FROM `/path/to/table/`.files;

SELECT * FROM `/path/to/table/`.snapshots;

内省

使用 SHOW FUNCTIONS 列出 Foundry SQL 支持的函数。该语句返回三列:namekind(值为 scalaraggregatewindow)和 signature。可选的 LIKE 模式按函数名称过滤,其中 % 匹配任意字符序列,_ 匹配单个字符。

SHOW FUNCTIONS;
SHOW FUNCTIONS LIKE 'abs%';

函数

函数可用于以更复杂的方式转换数据。有关以下每个函数的详细信息,请参考 Spark SQL 文档 ↗

数组函数

函数 描述 示例 Furnace Ontology
array(expr, ...) 使用给定元素创建数组 SELECT array(1, 2, 3) 支持 支持
array_distinct(array) 移除数组中的重复值 SELECT array_distinct(tags) FROM table 支持 不支持
array_intersect(array1, array2) 两个数组中都存在的元素(无重复) SELECT array_intersect(arr1, arr2) FROM table 支持 不支持
array_union(array1, array2) 任一数组中存在的元素(无重复) SELECT array_union(arr1, arr2) FROM table 支持 不支持
array_except(array1, array2) 在 array1 中但不在 array2 中的元素 SELECT array_except(arr1, arr2) FROM table 支持 不支持
array_join(array, delimiter [, nullReplacement]) 使用分隔符连接数组元素 SELECT array_join(tags, ',') FROM table 支持 不支持
array_max(array) 数组中的最大值 SELECT array_max(ARRAY(1, 5, 3)) 支持 不支持
array_min(array) 数组中的最小值 SELECT array_min(ARRAY(1, 5, 3)) 支持 不支持
array_position(array, element) 第一次出现的位置(从1开始,未找到返回0) SELECT array_position(tags, 'item') FROM table 支持 不支持
array_remove(array, element) 移除所有匹配的元素 SELECT array_remove(tags, 'old') FROM table 支持 不支持
array_size(array) 数组中的元素数量 SELECT array_size(tags) FROM table 支持 不支持
array_contains(array, value) 如果数组包含该值则返回 true SELECT array_contains(tags, 'new') FROM table 支持 不支持
array_repeat(element, count) 创建元素重复 count 次的数组 SELECT array_repeat('x', 5) 支持 不支持
arrays_overlap(array1, array2) 如果数组有共同的非空元素则返回 true SELECT arrays_overlap(arr1, arr2) FROM table 支持 不支持
flatten(arrayOfArrays) 将嵌套数组展平为单个数组 SELECT flatten(nested_arrays) FROM table 支持 不支持
sequence(start, stop, step) 生成整数序列 SELECT sequence(1, 10, 2) 支持 不支持
shuffle(array) 数组的随机排列 SELECT shuffle(tags) FROM table 支持 不支持
slice(array, start, length) 从 start 开始长度为 length 的数组子集 SELECT slice(tags, 1, 2) FROM table 支持 不支持
sort_array(array[, ascendingOrder]) 按升序(默认)或降序排序数组 SELECT sort_array(tags) FROM table 支持 不支持
array_sort(array) 按升序排序数组,null 值排在最后 SELECT array_sort(tags) FROM table 支持 不支持
get(array, index) 返回给定索引处的元素(从0开始) SELECT get(tags, 0) FROM table 支持 不支持
element_at(array, index) 返回给定索引处的元素(从1开始;负数从末尾计数) SELECT element_at(tags, 1) FROM table 支持 不支持
size(array) 数组中的元素数量(也接受 map) SELECT size(tags) FROM table 支持 不支持
cardinality(array) 数组中的元素数量(也接受 map) SELECT cardinality(tags) FROM table 支持 不支持
explode(array) 为数组中的每个元素生成新行 SELECT explode(tags) FROM table 支持 不支持

Map 函数

函数 描述 示例 Furnace Ontology
map(key1, value1, key2, value2, ...) 从键值对创建 map SELECT map('a', 1, 'b', 2) 支持 不支持
map_keys(map) 返回 map 的键数组 SELECT map_keys(attributes) FROM table 支持 不支持
map_values(map) 返回 map 的值数组 SELECT map_values(attributes) FROM table 支持 不支持
element_at(map, key) 返回给定键的值 SELECT element_at(attributes, 'color') FROM table 支持 不支持
size(map) map 中的条目数(也接受数组) SELECT size(attributes) FROM table 支持 不支持
cardinality(map) map 中的条目数(也接受数组) SELECT cardinality(attributes) FROM table 支持 不支持

Struct 函数

函数 描述 示例 Furnace Ontology
struct(expr1, expr2, ...) 从表达式创建 struct SELECT struct('red', 'large', 2.0) 支持 不支持

日期和时间戳函数

函数 描述 示例 Furnace Ontology
current_timestamp() 返回当前时间戳 SELECT current_timestamp() 支持 支持
current_date() 返回当前日期 SELECT current_date() 支持 支持
now() 返回当前时间戳(current_timestamp 的别名) SELECT now() 支持 不支持
add_months(date, num_months) 给日期添加月数 add_months(order_date, 3) 支持 不支持
date_add(expr, num_days) 给日期添加天数 date_add(order_date, 7) 支持 支持
date_format(expr, format) 将日期或时间戳格式化为字符串 SELECT date_format(TIMESTAMP '2024-01-15 10:30:00', 'yyyy-MM-dd') 支持 不支持
date_sub(expr, num_days) 从日期减去天数 date_sub(order_date, 7) 支持 支持
from_unixtime(expr [, format]) 将 Unix 时间戳转换为时间戳 from_unixtime(1609459200) 支持 不支持
unix_timestamp([expr] [, format]) 将时间戳转换为 Unix 时间戳 unix_timestamp(current_timestamp()) 支持 不支持
day(date) 从日期中提取月份中的日 SELECT day(order_date) FROM table 支持 支持
hour(timestamp) 从时间戳中提取小时 SELECT hour(order_timestamp) FROM table 支持 不支持
minute(timestamp) 从时间戳中提取分钟 SELECT minute(order_timestamp) FROM table 支持 不支持
month(date) 从日期中提取月份 SELECT month(order_date) FROM table 支持 支持
quarter(date) 从日期中提取季度 SELECT quarter(order_date) FROM table 支持 支持
second(timestamp) 从时间戳中提取秒 SELECT second(order_timestamp) FROM table 支持 不支持
year(date) 从日期中提取年份 SELECT year(order_date) FROM table 支持 支持
date_trunc(unit, timestamp) 将时间戳截断到指定单位 SELECT date_trunc('day', current_timestamp()) 支持 不支持
trunc(date, fmt) 将日期截断到 YEARQUARTERMONTHWEEK 粒度 SELECT trunc(order_date, 'MM') 支持 不支持
to_date(str [, format]) 将字符串转换为日期 SELECT to_date('2024-01-15') 支持 不支持
to_timestamp(expr [, format]) 转换为时间戳 SELECT to_timestamp('2024-01-15 10:30:00') 支持 不支持
timestamp(expr) 转换为时间戳的简写形式 SELECT timestamp('2024-01-15 10:30:00') 支持 不支持
from_utc_timestamp(ts, tz) 在给定时区中解释 UTC 时间戳 SELECT from_utc_timestamp(ts, 'America/Los_Angeles') 支持 不支持
to_utc_timestamp(ts, tz) 将给定时区的时间戳转换为 UTC SELECT to_utc_timestamp(ts, 'America/Los_Angeles') 支持 不支持
dayofmonth(date) 从日期中提取月份中的日(day 的别名) SELECT dayofmonth(order_date) FROM table 支持 不支持
dayofweek(date) 星期几(1 = 星期日,7 = 星期六) SELECT dayofweek(order_date) FROM table 支持 不支持
weekday(date) 星期几(0 = 星期一,6 = 星期日) SELECT weekday(order_date) FROM table 支持 不支持
dayofyear(date) 一年中的第几天(1 到 366) SELECT dayofyear(order_date) FROM table 支持 不支持
weekofyear(date) ISO 周数 SELECT weekofyear(order_date) FROM table 支持 不支持
last_day(date) 包含该日期的月份的最后一天 SELECT last_day(order_date) FROM table 支持 不支持
next_day(start_date, day_of_week) start_date 之后第一个指定星期几的日期 SELECT next_day(order_date, 'Mon') FROM table 支持 不支持
date_diff(date1, date2) 两个日期相减 SELECT date_diff('2020-01-01', '2020-01-02') 支持 支持
months_between(ts1, ts2 [, roundOff]) 两个时间戳之间的月份数(小数)(除非 roundOff 为 false,否则四舍五入到八位) SELECT months_between('1997-02-28 10:30:00', '1996-10-30') 支持 不支持
timestamp_seconds(seconds) 从 UTC 纪元以来的秒数创建时间戳(接受小数值) SELECT timestamp_seconds(1230219000) 支持 不支持
timestamp_millis(millis) 从 UTC 纪元以来的毫秒数创建时间戳 SELECT timestamp_millis(1230219000123) 支持 不支持
timestamp_micros(micros) 从 UTC 纪元以来的微秒数创建时间戳 SELECT timestamp_micros(1230219000123123) 支持 不支持
unix_seconds(timestamp) 自 1970-01-01 UTC 以来的秒数(截断更高精度) SELECT unix_seconds(current_timestamp()) 支持 不支持
unix_millis(timestamp) 自 1970-01-01 UTC 以来的毫秒数(截断更高精度) SELECT unix_millis(current_timestamp()) 支持 不支持
unix_micros(timestamp) 自 1970-01-01 UTC 以来的微秒数 SELECT unix_micros(current_timestamp()) 支持 不支持

数学函数

函数 描述 示例 Furnace Ontology
abs(x) 绝对值 SELECT abs(-5) 支持 支持
acos(x) 反余弦 SELECT acos(0.5) 支持 不支持
asin(x) 反正弦 SELECT asin(0.5) 支持 不支持
atan(x) 反正切 SELECT atan(1) 支持 不支持
atan2(y, x) y/x 的反正切 SELECT atan2(1, 1) 支持 不支持
cbrt(x) 立方根 SELECT cbrt(27) 支持 不支持
ceil(x) 向上取整到最接近的整数 SELECT ceil(3.7) 支持 支持
ceiling(x) 向上取整到最接近的整数 SELECT ceiling(3.7) 支持 支持
cos(x) 余弦 SELECT cos(pi()) 支持