建议在同一 Region 内导入,避免公网传输费用。同一 Region 且同一云厂商内,数据传输通过内网进行。
使用示例
1. 从 User Volume 导入
CREATE TABLE birds (
id INT,
name VARCHAR(50),
wingspan_cm FLOAT,
colors STRING
);
PUT '/Users/Downloads/data.csv' TO USER VOLUME FILE 'data.csv';
COPY INTO birds FROM USER VOLUME
USING csv
OPTIONS('header'='true')
FILES ('data.csv')
PURGE = TRUE;
2. 从 Table Volume 导入
CREATE TABLE birds (
id INT,
name VARCHAR(50),
wingspan_cm FLOAT,
colors STRING
);
PUT '/Users/Downloads/data.csv' TO TABLE VOLUME birds FILE 'data.csv';
COPY INTO birds FROM TABLE VOLUME birds
USING csv
OPTIONS('header'='true')
FILES ('data.csv')
PURGE = TRUE;
3. 从 OSS 导入
CREATE TABLE birds (
id INT,
name VARCHAR(50),
wingspan_cm FLOAT,
colors STRING
);
CREATE STORAGE CONNECTION catalog_storage_oss
TYPE OSS
ACCESS_ID = 'xxxx'
ACCESS_KEY = 'xxxxxxx'
ENDPOINT = 'oss-cn-hangzhou-internal.aliyuncs.com';
CREATE EXTERNAL VOLUME my_volume
LOCATION 'oss://mybucket/test_insert/'
USING CONNECTION catalog_storage_oss
DIRECTORY = (ENABLE = TRUE, AUTO_REFRESH = TRUE);
COPY INTO birds FROM VOLUME my_volume
USING csv
SUBDIRECTORY 'dau_unload/read/';
4. 从 COS 导入
CREATE TABLE birds (
id INT,
name VARCHAR(50),
wingspan_cm FLOAT,
colors STRING
);
CREATE STORAGE CONNECTION my_conn
TYPE COS
ACCESS_KEY = '<access_key>'
SECRET_KEY = '<secret_key>'
REGION = 'ap-shanghai'
APP_ID = '1310000503';
CREATE EXTERNAL VOLUME my_volume
LOCATION 'cos://mybucket/test_insert/'
USING CONNECTION my_conn
DIRECTORY = (ENABLE = TRUE, AUTO_REFRESH = TRUE);
COPY INTO birds FROM VOLUME my_volume
USING csv
SUBDIRECTORY 'dau_unload/read/';
5. 从 S3 导入
CREATE TABLE birds (
id INT,
name VARCHAR(50),
wingspan_cm FLOAT,
colors STRING
);
CREATE STORAGE CONNECTION aws_bj_conn
TYPE S3
ACCESS_KEY = 'AKIAQNBSBP6EIJE33***'
SECRET_KEY = '7kfheDrmq***************************'
ENDPOINT = 's3.cn-north-1.amazonaws.com.cn'
REGION = 'cn-north-1';
CREATE EXTERNAL VOLUME my_volume
LOCATION 's3://mybucket/test_insert/'
USING CONNECTION aws_bj_conn
DIRECTORY = (ENABLE = TRUE, AUTO_REFRESH = TRUE);
COPY INTO birds FROM VOLUME my_volume
USING csv
SUBDIRECTORY 'dau_unload/read/';
CREATE TABLE IF NOT EXISTS doc_copy_transform (id INT, name_upper STRING);
-- 从竖线分隔的 CSV 导入,同时做类型转换和大写处理
-- 注意:VOLUME 子句中必须声明列名和类型
COPY INTO doc_copy_transform FROM (
SELECT CAST(col0 AS INT), UPPER(col1)
FROM USER VOLUME(col0 STRING, col1 STRING, col2 STRING)
USING CSV
OPTIONS('sep'='|')
FILES('data.csv')
);
SELECT * FROM doc_copy_transform ORDER BY id;
+----+------------+
| id | name_upper |
+----+------------+
| 1 | ALICE |
| 2 | BOB |
| 3 | CAROL |
+----+------------+
CREATE TABLE IF NOT EXISTS orders (id INT, customer STRING, amount INT);
-- 文件内容示例:
-- 1|Alice|500
-- 2|Bob|300
COPY INTO orders FROM USER VOLUME
USING CSV
OPTIONS('sep'='|')
FILES('orders.csv');
SELECT * FROM orders ORDER BY id;
+----+----------+--------+
| id | customer | amount |
+----+----------+--------+
| 1 | Alice | 500 |
| 2 | Bob | 300 |
+----+----------+--------+
12. 带 header 的 CSV 导入(header 行自动跳过)
CREATE TABLE IF NOT EXISTS employees (id INT, name STRING, score INT);
-- 文件内容示例(第一行为 header):
-- id,name,score
-- 1,Alice,95
-- 2,Bob,87
COPY INTO employees FROM USER VOLUME
USING CSV
OPTIONS('header'='true')
FILES('employees.csv');
SELECT * FROM employees ORDER BY id;
+----+-------+-------+
| id | name | score |
+----+-------+-------+
| 1 | Alice | 95 |
| 2 | Bob | 87 |
+----+-------+-------+
13. gzip 压缩 CSV 导入
CREATE TABLE IF NOT EXISTS logs (ts STRING, level STRING, message STRING);
COPY INTO logs FROM VOLUME my_volume
USING CSV
OPTIONS('header'='true', 'compression'='gzip')
SUBDIRECTORY 'logs/2024/';
14. JSON 数组导入
CREATE TABLE IF NOT EXISTS events (id INT, name STRING);
-- 文件内容示例(JSON 数组格式):
-- [{"id":1,"name":"login"},{"id":2,"name":"logout"}]
COPY INTO events FROM USER VOLUME
USING JSON
OPTIONS('explodeArray'='true')
FILES('events.json');
SELECT * FROM events ORDER BY id;
+----+--------+
| id | name |
+----+--------+
| 1 | login |
| 2 | logout |
+----+--------+
相关指南
文件批量导入导出:COPY INTO 的完整使用场景,含 CSV/Parquet/JSON 格式、正则匹配、错误处理