-- ─────────────────────────────────────────────────────────────────
-- fandata.ai — TimescaleDB Schema
--
-- WHY TIMESCALEDB OVER ALTERNATIVES:
--
-- MongoDB:     Good for flexible docs but poor for time-series queries
--              like "give me 30-day rolling average streams per market"
--
-- Plain PostgreSQL: Works, but slow at scale for time-series range scans
--
-- ClickHouse:  Very fast for analytics but complex to operate, overkill
--              for an early-stage product
--
-- BigQuery:    Great for analytics but expensive and adds GCP dependency
--
-- TimescaleDB: PostgreSQL (familiar SQL) + automatic time partitioning
--              + compression + continuous aggregates. Free self-hosted.
--              Perfect for this use case. Scales to billions of rows.
--
-- INSTALL: CREATE EXTENSION IF NOT EXISTS timescaledb;
-- ─────────────────────────────────────────────────────────────────

CREATE EXTENSION IF NOT EXISTS timescaledb;

-- ─────────────────────────────────────────────
-- CORE: Artists master table
-- ─────────────────────────────────────────────
CREATE TABLE artists (
  id                  SERIAL PRIMARY KEY,
  name                TEXT NOT NULL,
  name_native         TEXT,          -- 한국어 / 日本語 name
  country             CHAR(2),       -- 'KR', 'JP'
  label               TEXT,
  spotify_id          TEXT UNIQUE,
  youtube_channel_id  TEXT UNIQUE,
  chartmetric_id      INTEGER UNIQUE,
  x_search_query      TEXT,          -- e.g. 'NewJeans OR 뉴진스'
  active              BOOLEAN DEFAULT TRUE,
  created_at          TIMESTAMPTZ DEFAULT NOW()
);

-- ─────────────────────────────────────────────
-- SPOTIFY TABLES
-- ─────────────────────────────────────────────
CREATE TABLE spotify_artist_snapshots (
  artist_id    TEXT        NOT NULL,
  artist_name  TEXT,
  followers    INTEGER,
  popularity   SMALLINT,  -- 0–100
  genres       TEXT[],
  collected_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);

-- Convert to hypertable (TimescaleDB time-series magic)
SELECT create_hypertable('spotify_artist_snapshots', 'collected_at');

CREATE TABLE spotify_track_features (
  track_id          TEXT PRIMARY KEY,
  danceability      FLOAT,
  energy            FLOAT,
  valence           FLOAT,   -- musical "happiness" 0–1
  tempo             FLOAT,
  acousticness      FLOAT,
  instrumentalness  FLOAT,
  speechiness       FLOAT,
  collected_at      TIMESTAMPTZ DEFAULT NOW()
);

CREATE TABLE spotify_top_tracks (
  artist_id    TEXT,
  track_id     TEXT,
  track_name   TEXT,
  market       CHAR(2),    -- ISO country code
  popularity   SMALLINT,
  collected_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
SELECT create_hypertable('spotify_top_tracks', 'collected_at');

CREATE TABLE spotify_related_artists (
  source_artist_id   TEXT,
  related_artist_id  TEXT,
  related_name       TEXT,
  popularity         SMALLINT,
  genres             TEXT[],
  collected_at       TIMESTAMPTZ DEFAULT NOW(),
  UNIQUE(source_artist_id, related_artist_id)
);

-- ─────────────────────────────────────────────
-- YOUTUBE TABLES
-- ─────────────────────────────────────────────
CREATE TABLE youtube_video_snapshots (
  video_id      TEXT        NOT NULL,
  title         TEXT,
  view_count    BIGINT,
  like_count    INTEGER,
  comment_count INTEGER,
  published_at  TIMESTAMPTZ,
  collected_at  TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
SELECT create_hypertable('youtube_video_snapshots', 'collected_at');

CREATE TABLE youtube_channel_snapshots (
  channel_id       TEXT        NOT NULL,
  channel_name     TEXT,
  subscriber_count BIGINT,
  view_count       BIGINT,
  video_count      INTEGER,
  collected_at     TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
SELECT create_hypertable('youtube_channel_snapshots', 'collected_at');

CREATE TABLE youtube_fan_content (
  video_id      TEXT PRIMARY KEY,
  title         TEXT,
  channel_title TEXT,
  published_at  TIMESTAMPTZ,
  search_query  TEXT,
  collected_at  TIMESTAMPTZ DEFAULT NOW()
);

-- ─────────────────────────────────────────────
-- CHARTMETRIC TABLES
-- ─────────────────────────────────────────────
CREATE TABLE chartmetric_artist_stats (
  cm_artist_id               INTEGER     NOT NULL,
  spotify_followers          BIGINT,
  spotify_popularity         SMALLINT,
  youtube_subscribers        BIGINT,
  youtube_views              BIGINT,
  tiktok_followers           BIGINT,
  tiktok_likes               BIGINT,
  instagram_followers        BIGINT,
  instagram_engagement_rate  FLOAT,
  cm_score                   FLOAT,      -- Chartmetric momentum score
  collected_at               TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
SELECT create_hypertable('chartmetric_artist_stats', 'collected_at');

CREATE TABLE chartmetric_playlist_placements (
  cm_artist_id     INTEGER,
  playlist_id      TEXT,
  playlist_name    TEXT,
  playlist_country CHAR(2),
  follower_count   BIGINT,
  placement_date   DATE,
  collected_at     TIMESTAMPTZ DEFAULT NOW(),
  UNIQUE(cm_artist_id, playlist_id)
);

-- City-level fan distribution — powers the touring heatmap
CREATE TABLE chartmetric_city_fanbase (
  cm_artist_id  INTEGER     NOT NULL,
  city          TEXT,
  country       CHAR(2),
  listeners     INTEGER,
  rank          SMALLINT,
  collected_at  TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
SELECT create_hypertable('chartmetric_city_fanbase', 'collected_at');

-- ─────────────────────────────────────────────
-- X (TWITTER) TABLES
-- ─────────────────────────────────────────────
CREATE TABLE x_mention_snapshots (
  artist_id      INTEGER     NOT NULL,
  search_query   TEXT,
  mention_count  INTEGER,
  total_likes    BIGINT,
  total_retweets BIGINT,
  collected_at   TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
SELECT create_hypertable('x_mention_snapshots', 'collected_at');

CREATE TABLE x_hashtag_snapshots (
  artist_id    INTEGER     NOT NULL,
  hashtag      TEXT,
  volume       INTEGER,
  collected_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
SELECT create_hypertable('x_hashtag_snapshots', 'collected_at');

-- ─────────────────────────────────────────────
-- CONTINUOUS AGGREGATES (TimescaleDB superpower)
-- These auto-refresh and make dashboard queries instant
-- ─────────────────────────────────────────────

-- 7-day rolling Spotify follower growth per artist
CREATE MATERIALIZED VIEW spotify_weekly_growth
WITH (timescaledb.continuous) AS
  SELECT
    artist_id,
    time_bucket('7 days', collected_at) AS week,
    LAST(followers, collected_at)       AS followers_end,
    FIRST(followers, collected_at)      AS followers_start,
    LAST(popularity, collected_at)      AS popularity
  FROM spotify_artist_snapshots
  GROUP BY artist_id, week
WITH NO DATA;

SELECT add_continuous_aggregate_policy('spotify_weekly_growth',
  start_offset => INTERVAL '3 months',
  end_offset   => INTERVAL '1 hour',
  schedule_interval => INTERVAL '1 day'
);

-- Daily X mention volume per artist
CREATE MATERIALIZED VIEW x_daily_mentions
WITH (timescaledb.continuous) AS
  SELECT
    artist_id,
    time_bucket('1 day', collected_at) AS day,
    SUM(mention_count)  AS total_mentions,
    SUM(total_likes)    AS total_likes,
    SUM(total_retweets) AS total_retweets
  FROM x_mention_snapshots
  GROUP BY artist_id, day
WITH NO DATA;

SELECT add_continuous_aggregate_policy('x_daily_mentions',
  start_offset      => INTERVAL '1 month',
  end_offset        => INTERVAL '1 hour',
  schedule_interval => INTERVAL '1 day'
);

-- ─────────────────────────────────────────────
-- INDEXES for dashboard query performance
-- ─────────────────────────────────────────────
CREATE INDEX ON spotify_artist_snapshots (artist_id, collected_at DESC);
CREATE INDEX ON spotify_top_tracks (artist_id, market, collected_at DESC);
CREATE INDEX ON chartmetric_city_fanbase (cm_artist_id, collected_at DESC);
CREATE INDEX ON x_mention_snapshots (artist_id, collected_at DESC);
CREATE INDEX ON youtube_video_snapshots (video_id, collected_at DESC);

-- ─────────────────────────────────────────────
-- SAMPLE QUERY: Dashboard fan engagement score
-- Combines Spotify + YouTube + X into one engagement index
-- ─────────────────────────────────────────────
/*
SELECT
  a.name,
  s.followers                                      AS spotify_followers,
  s.popularity                                     AS spotify_popularity,
  yc.subscriber_count                              AS youtube_subscribers,
  xm.mention_count                                 AS x_mentions_today,
  -- Simple composite engagement score (customise weights)
  (s.popularity * 1000
    + (s.followers / 1000)
    + (yc.subscriber_count / 10000)
    + (xm.mention_count * 5)
  )                                                AS engagement_score
FROM artists a
JOIN LATERAL (
  SELECT followers, popularity
  FROM spotify_artist_snapshots
  WHERE artist_id = a.spotify_id
  ORDER BY collected_at DESC LIMIT 1
) s ON TRUE
JOIN LATERAL (
  SELECT subscriber_count
  FROM youtube_channel_snapshots
  WHERE channel_id = a.youtube_channel_id
  ORDER BY collected_at DESC LIMIT 1
) yc ON TRUE
JOIN LATERAL (
  SELECT mention_count
  FROM x_mention_snapshots
  WHERE artist_id = a.id
    AND collected_at > NOW() - INTERVAL '24 hours'
  ORDER BY collected_at DESC LIMIT 1
) xm ON TRUE
ORDER BY engagement_score DESC;
*/
