빅쿼리 지리공간 함수
지리공간 데이터 분석
BigQuery에서 지리공간 데이터를 처리하고 분석하는 방법을 다루는 종합 가이드입니다.
목차
1. 지리공간 데이터 개요
1.1 지리공간 데이터 타입
BigQuery는 GEOGRAPHY 데이터 타입을 지원하여 점, 선, 면 등의 지리적 데이터를 저장할 수 있습니다.
-- 기본 지리공간 데이터 생성
SELECT
ST_GEOGPOINT(-74.006, 40.7128) as nyc_location, -- 점 (경도, 위도)
ST_GEOGFROMTEXT('LINESTRING(-74.006 40.7128, -73.935 40.7614)') as route, -- 선
ST_GEOGFROMTEXT('POLYGON((-74.1 40.6, -73.9 40.6, -73.9 40.8, -74.1 40.8, -74.1 40.6))') as area -- 면
1.2 좌표계 이해
BigQuery는 WGS84 좌표계를 사용합니다 (경도/위도).
-- 좌표 형식 예제
WITH locations AS (
SELECT
'New York' as city,
ST_GEOGPOINT(-74.006, 40.7128) as location -- 경도, 위도 순서
UNION ALL
SELECT
'London',
ST_GEOGPOINT(-0.1278, 51.5074)
UNION ALL
SELECT
'Tokyo',
ST_GEOGPOINT(139.6917, 35.6895)
)
SELECT
city,
location,
ST_X(location) as longitude, -- 경도 추출
ST_Y(location) as latitude -- 위도 추출
FROM locations;
2. 기본 지리공간 함수
2.1 점(Point) 생성 및 조작
-- 다양한 방법으로 점 생성
WITH point_examples AS (
SELECT
-- 경도/위도로 점 생성
ST_GEOGPOINT(-73.9857, 40.7484) as times_square,
-- WKT(Well-Known Text)에서 생성
ST_GEOGFROMTEXT('POINT(-73.9857 40.7484)') as times_square_wkt,
-- GeoJSON에서 생성
ST_GEOGFROMGEOJSON('{"type": "Point", "coordinates": [-73.9857, 40.7484]}') as times_square_geojson
)
SELECT
times_square,
-- 점에서 좌표 추출
ST_X(times_square) as longitude,
ST_Y(times_square) as latitude,
-- WKT 형식으로 변환
ST_ASTEXT(times_square) as as_wkt,
-- GeoJSON 형식으로 변환
ST_ASGEOJSON(times_square) as as_geojson
FROM point_examples;
2.2 선(LineString) 및 면(Polygon) 생성
-- 선과 면 생성 예제
WITH geometry_examples AS (
SELECT
-- 선 생성 (경로)
ST_MAKELINE([
ST_GEOGPOINT(-73.9857, 40.7484), -- Times Square
ST_GEOGPOINT(-73.9776, 40.7505), -- 근처 지점
ST_GEOGPOINT(-73.9712, 40.7527) -- 또 다른 지점
]) as route,
-- 면 생성 (Central Park 대략적 경계)
ST_GEOGFROMTEXT('''
POLYGON((
-73.9733 40.7648,
-73.9582 40.7648,
-73.9582 40.8003,
-73.9733 40.8003,
-73.9733 40.7648
))
''') as central_park_area
)
SELECT
route,
central_park_area,
-- 선의 길이 (미터)
ST_LENGTH(route) as route_length_meters,
-- 면의 넓이 (제곱미터)
ST_AREA(central_park_area) as park_area_sqm,
-- 면의 둘레 (미터)
ST_PERIMETER(central_park_area) as park_perimeter_meters
FROM geometry_examples;
2.3 복합 도형(Multi-geometry) 처리
-- 여러 점을 하나의 도형으로 결합
WITH store_locations AS (
SELECT 'NYC_STORES' as store_group,
ST_UNION_AGG(store_location) as all_stores_combined
FROM (
SELECT ST_GEOGPOINT(-73.9857, 40.7484) as store_location -- Store 1
UNION ALL
SELECT ST_GEOGPOINT(-73.9776, 40.7505) -- Store 2
UNION ALL
SELECT ST_GEOGPOINT(-73.9712, 40.7527) -- Store 3
)
)
SELECT
store_group,
all_stores_combined,
-- 결합된 도형의 경계 상자
ST_BOUNDINGBOX(all_stores_combined) as bounding_box,
-- 결합된 도형의 중심점
ST_CENTROID(all_stores_combined) as centroid
FROM store_locations;
3. 거리 및 면적 계산
3.1 거리 계산
-- 두 지점 간 거리 계산
WITH distance_examples AS (
SELECT
ST_GEOGPOINT(-74.006, 40.7128) as point_a, -- NYC
ST_GEOGPOINT(-0.1278, 51.5074) as point_b -- London
)
SELECT
-- 두 점 간 직선 거리 (미터)
ST_DISTANCE(point_a, point_b) as distance_meters,
-- 킬로미터로 변환
ST_DISTANCE(point_a, point_b) / 1000 as distance_km,
-- 마일로 변환
ST_DISTANCE(point_a, point_b) / 1609.344 as distance_miles
FROM distance_examples;
3.2 반경 내 검색
-- 특정 지점에서 반경 내 검색
WITH locations AS (
SELECT 'Central Park' as name, ST_GEOGPOINT(-73.9654, 40.7829) as location
UNION ALL SELECT 'Times Square', ST_GEOGPOINT(-73.9857, 40.7484)
UNION ALL SELECT 'Brooklyn Bridge', ST_GEOGPOINT(-73.9969, 40.7061)
UNION ALL SELECT 'Statue of Liberty', ST_GEOGPOINT(-74.0445, 40.6892)
),
search_center AS (
SELECT ST_GEOGPOINT(-73.9857, 40.7484) as center -- Times Square 중심
)
SELECT
l.name,
l.location,
ST_DISTANCE(l.location, s.center) as distance_meters,
ST_DISTANCE(l.location, s.center) / 1000 as distance_km
FROM locations l
CROSS JOIN search_center s
WHERE ST_DISTANCE(l.location, s.center) <= 5000 -- 5km 반경 내
ORDER BY distance_meters;
3.3 최근접 지점 찾기
-- 각 고객에게 가장 가까운 매장 찾기
WITH customers AS (
SELECT 'Customer_A' as customer_id, ST_GEOGPOINT(-73.9857, 40.7484) as location
UNION ALL SELECT 'Customer_B', ST_GEOGPOINT(-73.9712, 40.7527)
UNION ALL SELECT 'Customer_C', ST_GEOGPOINT(-74.0059, 40.7128)
),
stores AS (
SELECT 'Store_1' as store_id, ST_GEOGPOINT(-73.9776, 40.7505) as location
UNION ALL SELECT 'Store_2', ST_GEOGPOINT(-73.9969, 40.7061)
UNION ALL SELECT 'Store_3', ST_GEOGPOINT(-74.0445, 40.6892)
),
distances AS (
SELECT
c.customer_id,
s.store_id,
ST_DISTANCE(c.location, s.location) as distance_meters,
ROW_NUMBER() OVER (
PARTITION BY c.customer_id
ORDER BY ST_DISTANCE(c.location, s.location)
) as distance_rank
FROM customers c
CROSS JOIN stores s
)
SELECT
customer_id,
store_id as nearest_store,
distance_meters,
ROUND(distance_meters / 1000, 2) as distance_km
FROM distances
WHERE distance_rank = 1
ORDER BY customer_id;
4. 공간 관계 분석
4.1 포함 관계 (Contains/Within)
-- 지역 내 포함 관계 분석
WITH districts AS (
SELECT
'Manhattan' as district,
ST_GEOGFROMTEXT('''
POLYGON((
-74.0479 40.6829,
-73.9067 40.6829,
-73.9067 40.8820,
-74.0479 40.8820,
-74.0479 40.6829
))
''') as boundary
),
points_of_interest AS (
SELECT 'Times Square' as poi, ST_GEOGPOINT(-73.9857, 40.7484) as location
UNION ALL SELECT 'Central Park', ST_GEOGPOINT(-73.9654, 40.7829)
UNION ALL SELECT 'Brooklyn Bridge', ST_GEOGPOINT(-73.9969, 40.7061)
UNION ALL SELECT 'JFK Airport', ST_GEOGPOINT(-73.7781, 40.6413) -- Queens
)
SELECT
poi.poi,
d.district,
ST_CONTAINS(d.boundary, poi.location) as is_within_district,
ST_DISTANCE(ST_CENTROID(d.boundary), poi.location) as distance_from_center
FROM points_of_interest poi
CROSS JOIN districts d
ORDER BY poi.poi;
4.2 교차 및 중첩 분석
-- 배송 구역과 행정구역 중첩 분석
WITH delivery_zones AS (
SELECT
'Zone_A' as zone_id,
ST_BUFFER(ST_GEOGPOINT(-73.9857, 40.7484), 2000) as coverage_area -- 2km 반경
UNION ALL
SELECT
'Zone_B',
ST_BUFFER(ST_GEOGPOINT(-73.9654, 40.7829), 1500) -- 1.5km 반경
),
neighborhoods AS (
SELECT
'Midtown' as neighborhood,
ST_GEOGFROMTEXT('''
POLYGON((
-73.9912 40.7489,
-73.9734 40.7489,
-73.9734 40.7670,
-73.9912 40.7670,
-73.9912 40.7489
))
''') as boundary
)
SELECT
dz.zone_id,
n.neighborhood,
-- 교차 여부
ST_INTERSECTS(dz.coverage_area, n.boundary) as zones_intersect,
-- 교차 면적
ST_AREA(ST_INTERSECTION(dz.coverage_area, n.boundary)) as intersection_area_sqm,
-- 커버리지 비율
ST_AREA(ST_INTERSECTION(dz.coverage_area, n.boundary)) /
ST_AREA(n.boundary) * 100 as coverage_percentage
FROM delivery_zones dz
CROSS JOIN neighborhoods n
WHERE ST_INTERSECTS(dz.coverage_area, n.boundary);
4.3 버퍼 및 영향권 분석
-- 지하철역 영향권 분석
WITH subway_stations AS (
SELECT 'Times Sq-42 St' as station, ST_GEOGPOINT(-73.9857, 40.7590) as location
UNION ALL SELECT 'Union Square', ST_GEOGPOINT(-73.9903, 40.7359)
UNION ALL SELECT 'Grand Central', ST_GEOGPOINT(-73.9772, 40.7527)
),
businesses AS (
SELECT 'Restaurant_A' as business, ST_GEOGPOINT(-73.9850, 40.7580) as location
UNION ALL SELECT 'Hotel_B', ST_GEOGPOINT(-73.9900, 40.7350)
UNION ALL SELECT 'Office_C', ST_GEOGPOINT(-73.9780, 40.7530)
)
SELECT
b.business,
ss.station,
ST_DISTANCE(b.location, ss.location) as distance_to_station,
-- 300m 이내 여부 (도보 3-4분)
ST_DISTANCE(b.location, ss.location) <= 300 as within_walking_distance,
-- 영향권 등급
CASE
WHEN ST_DISTANCE(b.location, ss.location) <= 200 THEN 'Premium'
WHEN ST_DISTANCE(b.location, ss.location) <= 500 THEN 'High'
WHEN ST_DISTANCE(b.location, ss.location) <= 1000 THEN 'Medium'
ELSE 'Low'
END as proximity_grade
FROM businesses b
CROSS JOIN subway_stations ss
QUALIFY ROW_NUMBER() OVER (
PARTITION BY b.business
ORDER BY ST_DISTANCE(b.location, ss.location)
) = 1 -- 가장 가까운 역만
ORDER BY b.business;
5. 지오코딩과 주소 변환
5.1 주소를 좌표로 변환
-- 주소 문자열을 지리좌표로 변환 (외부 API 연동 필요)
WITH addresses AS (
SELECT 'Empire State Building, New York, NY' as address
UNION ALL SELECT '1600 Amphitheatre Parkway, Mountain View, CA'
UNION ALL SELECT 'Times Square, New York, NY'
)
SELECT
address,
-- 실제 지오코딩은 외부 서비스 필요 (Google Maps API 등)
CASE
WHEN address LIKE '%Empire State%' THEN ST_GEOGPOINT(-73.9857, 40.7484)
WHEN address LIKE '%Amphitheatre Parkway%' THEN ST_GEOGPOINT(-122.0840, 37.4220)
WHEN address LIKE '%Times Square%' THEN ST_GEOGPOINT(-73.9857, 40.7590)
END as coordinates
FROM addresses;
5.2 좌표를 주소로 변환 (역지오코딩)
-- 좌표 기반 지역 정보 매칭
WITH coordinate_points AS (
SELECT
ST_GEOGPOINT(-73.9857, 40.7484) as location,
'Point_A' as point_id
UNION ALL
SELECT
ST_GEOGPOINT(-122.4194, 37.7749),
'Point_B'
),
zip_code_boundaries AS (
SELECT
'10001' as zip_code,
'Manhattan, NY' as area_name,
ST_GEOGFROMTEXT('''
POLYGON((
-73.9912 40.7400,
-73.9800 40.7400,
-73.9800 40.7600,
-73.9912 40.7600,
-73.9912 40.7400
))
''') as boundary
UNION ALL
SELECT
'94102',
'San Francisco, CA',
ST_GEOGFROMTEXT('''
POLYGON((
-122.4300 37.7700,
-122.4100 37.7700,
-122.4100 37.7800,
-122.4300 37.7800,
-122.4300 37.7700
))
''')
)
SELECT
cp.point_id,
cp.location,
zb.zip_code,
zb.area_name,
ST_DISTANCE(ST_CENTROID(zb.boundary), cp.location) as distance_from_center
FROM coordinate_points cp
JOIN zip_code_boundaries zb ON ST_CONTAINS(zb.boundary, cp.location)
ORDER BY cp.point_id;
6. 실제 활용 사례
6.1 배송 경로 최적화
-- 배송 경로 최적화 분석
WITH delivery_orders AS (
SELECT
'ORDER001' as order_id,
ST_GEOGPOINT(-73.9857, 40.7484) as delivery_address,
'10:00' as time_window_start,
'12:00' as time_window_end
UNION ALL
SELECT 'ORDER002', ST_GEOGPOINT(-73.9776, 40.7505), '11:00', '13:00'
UNION ALL SELECT 'ORDER003', ST_GEOGPOINT(-73.9712, 40.7527), '09:00', '11:00'
UNION ALL SELECT 'ORDER004', ST_GEOGPOINT(-73.9654, 40.7829), '14:00', '16:00'
),
warehouse_location AS (
SELECT ST_GEOGPOINT(-73.9969, 40.7061) as location
),
route_analysis AS (
SELECT
do.order_id,
do.delivery_address,
do.time_window_start,
ST_DISTANCE(wh.location, do.delivery_address) as distance_from_warehouse,
-- 다른 주문들과의 거리 계산 (클러스터링용)
ARRAY_AGG(
STRUCT(
do2.order_id as nearby_order,
ST_DISTANCE(do.delivery_address, do2.delivery_address) as distance
)
ORDER BY ST_DISTANCE(do.delivery_address, do2.delivery_address)
LIMIT 3
) as nearby_orders
FROM delivery_orders do
CROSS JOIN warehouse_location wh
LEFT JOIN delivery_orders do2 ON do.order_id != do2.order_id
GROUP BY do.order_id, do.delivery_address, do.time_window_start, distance_from_warehouse
)
SELECT
order_id,
delivery_address,
distance_from_warehouse / 1000 as distance_km,
-- 배송 우선순위 (거리 + 시간창 기준)
CASE
WHEN distance_from_warehouse <= 2000 AND time_window_start <= '11:00' THEN 'High Priority'
WHEN distance_from_warehouse <= 5000 THEN 'Medium Priority'
ELSE 'Low Priority'
END as delivery_priority,
nearby_orders
FROM route_analysis
ORDER BY distance_from_warehouse;
6.2 상권 분석
-- 상권 분석을 위한 지리공간 분석
WITH commercial_areas AS (
SELECT
'Midtown' as area_name,
ST_GEOGFROMTEXT('''
POLYGON((
-73.9912 40.7489,
-73.9734 40.7489,
-73.9734 40.7670,
-73.9912 40.7670,
-73.9912 40.7489
))
''') as boundary
UNION ALL
SELECT
'SoHo',
ST_GEOGFROMTEXT('''
POLYGON((
-74.0059 40.7193,
-73.9935 40.7193,
-73.9935 40.7284,
-74.0059 40.7284,
-74.0059 40.7193
))
''')
),
competitors AS (
SELECT 'Competitor_A' as name, ST_GEOGPOINT(-73.9857, 40.7590) as location, 'Restaurant' as type
UNION ALL SELECT 'Competitor_B', ST_GEOGPOINT(-73.9903, 40.7359), 'Restaurant'
UNION ALL SELECT 'Competitor_C', ST_GEOGPOINT(-74.0000, 40.7250), 'Cafe'
),
foot_traffic_points AS (
SELECT ST_GEOGPOINT(-73.9857, 40.7590) as location, 1000 as daily_visitors -- Times Square
UNION ALL SELECT ST_GEOGPOINT(-73.9772, 40.7527), 800 -- Grand Central
UNION ALL SELECT ST_GEOGPOINT(-74.0059, 40.7193), 600 -- SoHo area
),
potential_locations AS (
SELECT 'Location_1' as site, ST_GEOGPOINT(-73.9850, 40.7600) as location
UNION ALL SELECT 'Location_2', ST_GEOGPOINT(-74.0000, 40.7200)
UNION ALL SELECT 'Location_3', ST_GEOGPOINT(-73.9900, 40.7400)
)
SELECT
pl.site,
ca.area_name,
-- 경쟁사 분석
(SELECT COUNT(*)
FROM competitors c
WHERE ST_DISTANCE(pl.location, c.location) <= 500) as competitors_within_500m,
(SELECT AVG(ST_DISTANCE(pl.location, c.location))
FROM competitors c
WHERE ST_DISTANCE(pl.location, c.location) <= 1000) as avg_competitor_distance,
-- 유동인구 점수
(SELECT SUM(ftp.daily_visitors / (ST_DISTANCE(pl.location, ftp.location) / 100 + 1))
FROM foot_traffic_points ftp
WHERE ST_DISTANCE(pl.location, ftp.location) <= 1000) as foot_traffic_score,
-- 종합 점수
CASE
WHEN (SELECT COUNT(*) FROM competitors c WHERE ST_DISTANCE(pl.location, c.location) <= 300) = 0
AND ca.area_name = 'Midtown' THEN 'Excellent'
WHEN (SELECT COUNT(*) FROM competitors c WHERE ST_DISTANCE(pl.location, c.location) <= 500) <= 2
THEN 'Good'
ELSE 'Fair'
END as location_grade
FROM potential_locations pl
LEFT JOIN commercial_areas ca ON ST_CONTAINS(ca.boundary, pl.location)
ORDER BY foot_traffic_score DESC;
6.3 부동산 가격 예측을 위한 지리 특성
-- 부동산 가격에 영향을 미치는 지리적 요소 분석
WITH properties AS (
SELECT
'PROP001' as property_id,
ST_GEOGPOINT(-73.9857, 40.7484) as location,
850000 as price
UNION ALL
SELECT 'PROP002', ST_GEOGPOINT(-73.9776, 40.7505), 1200000
UNION ALL
SELECT 'PROP003', ST_GEOGPOINT(-73.9712, 40.7527), 950000
),
amenities AS (
SELECT 'Subway' as type, ST_GEOGPOINT(-73.9857, 40.7590) as location, 100 as importance_weight
UNION ALL SELECT 'School', ST_GEOGPOINT(-73.9800, 40.7550), 80
UNION ALL SELECT 'Park', ST_GEOGPOINT(-73.9654, 40.7829), 60
UNION ALL SELECT 'Hospital', ST_GEOGPOINT(-73.9900, 40.7400), 70
UNION ALL SELECT 'Shopping', ST_GEOGPOINT(-73.9857, 40.7484), 50
),
property_features AS (
SELECT
p.property_id,
p.location,
p.price,
-- 각 편의시설까지의 거리 기반 점수 계산
SUM(
CASE
WHEN ST_DISTANCE(p.location, a.location) <= 200 THEN a.importance_weight
WHEN ST_DISTANCE(p.location, a.location) <= 500 THEN a.importance_weight * 0.7
WHEN ST_DISTANCE(p.location, a.location) <= 1000 THEN a.importance_weight * 0.4
ELSE 0
END
) as amenity_score,
-- 가장 가까운 지하철역까지의 거리
MIN(
CASE WHEN a.type = 'Subway' THEN ST_DISTANCE(p.location, a.location) END
) as nearest_subway_distance,
-- 공원 접근성
MIN(
CASE WHEN a.type = 'Park' THEN ST_DISTANCE(p.location, a.location) END
) as nearest_park_distance
FROM properties p
CROSS JOIN amenities a
GROUP BY p.property_id, p.location, p.price
)
SELECT
property_id,
price,
amenity_score,
ROUND(nearest_subway_distance) as subway_distance_m,
ROUND(nearest_park_distance) as park_distance_m,
-- 가격 대비 편의성 점수
ROUND(amenity_score / (price / 100000), 2) as value_score,
-- 투자 등급
CASE
WHEN amenity_score > 200 AND nearest_subway_distance < 300 THEN 'Premium'
WHEN amenity_score > 150 AND nearest_subway_distance < 500 THEN 'High'
WHEN amenity_score > 100 THEN 'Medium'
ELSE 'Basic'
END as investment_grade
FROM property_features
ORDER BY value_score DESC;
7. 성능 최적화
7.1 지리공간 인덱싱
-- 클러스터링을 통한 지리공간 쿼리 성능 향상
CREATE OR REPLACE TABLE `project.geo.optimized_locations` (
location_id STRING,
location_name STRING,
coordinates GEOGRAPHY,
category STRING,
created_date DATE
)
PARTITION BY created_date
CLUSTER BY ST_GEOHASH(coordinates, 10); -- 지리해시 기반 클러스터링
7.2 공간 필터링 최적화
-- 효율적인 공간 범위 쿼리
WITH search_area AS (
-- 검색 영역을 먼저 정의
SELECT ST_GEOGFROMTEXT('''
POLYGON((
-74.0059 40.7193,
-73.9735 40.7193,
-73.9735 40.7670,
-74.0059 40.7670,
-74.0059 40.7193
))
''') as boundary
),
-- 바운딩 박스를 이용한 사전 필터링
candidates AS (
SELECT *
FROM `project.geo.locations`
WHERE
-- 바운딩 박스로 먼저 필터링 (인덱스 활용)
ST_X(coordinates) BETWEEN -74.0059 AND -73.9735
AND ST_Y(coordinates) BETWEEN 40.7193 AND 40.7670
)
-- 정확한 공간 관계 확인
SELECT
c.location_id,
c.location_name,
c.coordinates
FROM candidates c
CROSS JOIN search_area sa
WHERE ST_CONTAINS(sa.boundary, c.coordinates)
ORDER BY c.location_id;
7.3 대용량 지리공간 데이터 처리
-- 대용량 지리공간 데이터 배치 처리
CREATE OR REPLACE PROCEDURE `project.geo.process_large_dataset`()
BEGIN
DECLARE batch_size INT64 DEFAULT 100000;
DECLARE processed_count INT64 DEFAULT 0;
DECLARE total_count INT64;
-- 전체 레코드 수 확인
SET total_count = (
SELECT COUNT(*)
FROM `project.raw.gps_tracking_data`
);
-- 배치 단위로 처리
WHILE processed_count < total_count DO
-- 지리해시 계산 및 공간 집계 처리
INSERT INTO `project.geo.processed_tracking` (
geohash,
point_count,
avg_coordinates,
bounding_box,
processing_batch
)
SELECT
ST_GEOHASH(coordinates, 8) as geohash,
COUNT(*) as point_count,
ST_CENTROID(ST_UNION_AGG(coordinates)) as avg_coordinates,
ST_BOUNDINGBOX(ST_UNION_AGG(coordinates)) as bounding_box,
FLOOR(processed_count / batch_size) as processing_batch
FROM (
SELECT coordinates
FROM `project.raw.gps_tracking_data`
ORDER BY timestamp
LIMIT batch_size OFFSET processed_count
)
GROUP BY ST_GEOHASH(coordinates, 8);
SET processed_count = processed_count + batch_size;
-- 진행 상황 로깅
INSERT INTO `project.logs.processing_progress` (
timestamp,
processed_records,
total_records,
progress_percentage
) VALUES (
CURRENT_TIMESTAMP(),
processed_count,
total_count,
processed_count / total_count * 100
);
END WHILE;
END;
BigQuery의 지리공간 함수를 활용하면 위치 기반 분석, 경로 최적화, 상권 분석 등 다양한 지리적 인사이트를 도출할 수 있습니다. 적절한 클러스터링과 인덱싱을 통해 대용량 지리공간 데이터도 효율적으로 처리할 수 있습니다.