diff --git a/scripts/survey-data-migration.ts b/scripts/survey-data-migration.ts new file mode 100644 index 0000000000..6dfcd3bf2a --- /dev/null +++ b/scripts/survey-data-migration.ts @@ -0,0 +1,234 @@ +#!/usr/bin/env node + +/** + * Survey Data Migration Script + * + * One-time utility to normalize existing Mixpanel user properties + * for industry and use case fields. This addresses the proliferation + * of one-off categories that make analytics difficult. + * + * Usage: pnpm ts-node scripts/survey-data-migration.ts + * + * IMPORTANT: This script requires Mixpanel Data Management API access + * and should be run with appropriate credentials in production. + */ + +/* eslint-disable no-console */ + +import { + normalizeIndustry, + normalizeUseCase +} from '../src/platform/telemetry/utils/surveyNormalization' + +interface MixpanelUser { + $distinct_id: string + $properties: { + industry?: string + useCase?: string + [key: string]: any + } +} + +interface MigrationStats { + totalUsers: number + industryNormalized: number + useCaseNormalized: number + uncategorizedIndustries: Set + uncategorizedUseCases: Set +} + +/** + * Simulate the data migration process + * In production, this would integrate with Mixpanel Data Management API + */ +function simulateMigration(users: MixpanelUser[]): MigrationStats { + const stats: MigrationStats = { + totalUsers: users.length, + industryNormalized: 0, + useCaseNormalized: 0, + uncategorizedIndustries: new Set(), + uncategorizedUseCases: new Set() + } + + users.forEach((user) => { + let needsUpdate = false + const updates: Record = {} + + // Process industry normalization + if (user.$properties.industry) { + const normalized = normalizeIndustry(user.$properties.industry) + + if (normalized !== user.$properties.industry) { + updates.industry_normalized = normalized + updates.industry_raw = user.$properties.industry + stats.industryNormalized++ + needsUpdate = true + + if (normalized.startsWith('Uncategorized:')) { + stats.uncategorizedIndustries.add(user.$properties.industry) + } + } + } + + // Process use case normalization + if (user.$properties.useCase) { + const normalized = normalizeUseCase(user.$properties.useCase) + + if (normalized !== user.$properties.useCase) { + updates.useCase_normalized = normalized + updates.useCase_raw = user.$properties.useCase + stats.useCaseNormalized++ + needsUpdate = true + + if (normalized.startsWith('Uncategorized:')) { + stats.uncategorizedUseCases.add(user.$properties.useCase) + } + } + } + + // In production, this would make API calls to update user properties + if (needsUpdate) { + console.log(`Would update user ${user.$distinct_id}:`, updates) + } + }) + + return stats +} + +/** + * Generate sample data for testing normalization rules + */ +function generateSampleData(): MixpanelUser[] { + return [ + { + $distinct_id: 'user1', + $properties: { + industry: 'Film and television production', + useCase: 'Creating concept art for movies' + } + }, + { + $distinct_id: 'user2', + $properties: { + industry: 'Marketing & Social Media', + useCase: 'YouTube thumbnail generation' + } + }, + { + $distinct_id: 'user3', + $properties: { + industry: 'Software Development', + useCase: 'Product mockup creation' + } + }, + { + $distinct_id: 'user4', + $properties: { + industry: 'Indie Game Studio', + useCase: 'Game asset generation' + } + }, + { + $distinct_id: 'user5', + $properties: { + industry: 'Architecture firm', + useCase: 'Building visualization' + } + }, + { + $distinct_id: 'user6', + $properties: { + industry: 'Custom Jewelry Design', + useCase: 'Product photography' + } + }, + { + $distinct_id: 'user7', + $properties: { + industry: 'Medical Research', + useCase: 'Scientific visualization' + } + }, + { + $distinct_id: 'user8', + $properties: { + industry: 'Unknown Creative Field', + useCase: 'Personal art projects' + } + } + ] +} + +/** + * Production implementation would use Mixpanel Data Management API + * Example API structure (not actual implementation): + */ +async function productionMigration() { + console.log('๐Ÿ”ง Production Migration Process:') + console.log('1. Export user profiles via Mixpanel Data Management API') + console.log('2. Apply normalization to industry and useCase fields') + console.log( + '3. Create new properties: industry_normalized, useCase_normalized' + ) + console.log('4. Preserve original values as: industry_raw, useCase_raw') + console.log('5. Batch update user profiles') + console.log('6. Generate uncategorized response report for review') + + /* + Example API calls: + + // 1. Export users + const users = await mixpanel.people.query({ + where: 'properties["industry"] != null or properties["useCase"] != null' + }) + + // 2. Process and update + for (const user of users) { + const normalizedData = normalizeSurveyResponses(user.properties) + await mixpanel.people.set(user.distinct_id, normalizedData) + } + */ +} + +/** + * Main migration runner + */ +function main() { + console.log('๐Ÿ“Š Survey Data Migration Utility') + console.log('================================\n') + + // Run simulation with sample data + console.log('๐Ÿงช Running simulation with sample data...\n') + const sampleUsers = generateSampleData() + const stats = simulateMigration(sampleUsers) + + // Display results + console.log('๐Ÿ“ˆ Migration Results:') + console.log(`Total users processed: ${stats.totalUsers}`) + console.log(`Industry fields normalized: ${stats.industryNormalized}`) + console.log(`Use case fields normalized: ${stats.useCaseNormalized}`) + + if (stats.uncategorizedIndustries.size > 0) { + console.log('\nโ“ Uncategorized Industries (need review):') + Array.from(stats.uncategorizedIndustries).forEach((industry) => { + console.log(` โ€ข ${industry}`) + }) + } + + if (stats.uncategorizedUseCases.size > 0) { + console.log('\nโ“ Uncategorized Use Cases (need review):') + Array.from(stats.uncategorizedUseCases).forEach((useCase) => { + console.log(` โ€ข ${useCase}`) + }) + } + + console.log('\n' + '='.repeat(50)) + void productionMigration() +} + +// Run if called directly +if (require.main === module) { + main() +} + +export { simulateMigration, generateSampleData, MigrationStats } diff --git a/src/platform/telemetry/providers/cloud/MixpanelTelemetryProvider.ts b/src/platform/telemetry/providers/cloud/MixpanelTelemetryProvider.ts index 895a5fecd5..9f5babc88d 100644 --- a/src/platform/telemetry/providers/cloud/MixpanelTelemetryProvider.ts +++ b/src/platform/telemetry/providers/cloud/MixpanelTelemetryProvider.ts @@ -7,6 +7,7 @@ import { app } from '@/scripts/app' import { useNodeDefStore } from '@/stores/nodeDefStore' import { NodeSourceType } from '@/types/nodeSource' import { reduceAllNodes } from '@/utils/graphTraversalUtil' +import { normalizeSurveyResponses } from '../../utils/surveyNormalization' import type { AuthMetadata, @@ -178,7 +179,21 @@ export class MixpanelTelemetryProvider implements TelemetryProvider { ? TelemetryEvents.USER_SURVEY_OPENED : TelemetryEvents.USER_SURVEY_SUBMITTED - this.trackEvent(eventName, responses) + // Apply normalization to survey responses + const normalizedResponses = responses + ? normalizeSurveyResponses(responses) + : undefined + + this.trackEvent(eventName, normalizedResponses) + + // If this is a survey submission, also set user properties with normalized data + if (stage === 'submitted' && normalizedResponses && this.mixpanel) { + try { + this.mixpanel.people.set(normalizedResponses) + } catch (error) { + console.error('Failed to set survey user properties:', error) + } + } } trackEmailVerification(stage: 'opened' | 'requested' | 'completed'): void { diff --git a/src/platform/telemetry/utils/surveyNormalization.ts b/src/platform/telemetry/utils/surveyNormalization.ts new file mode 100644 index 0000000000..3c2cb6f69d --- /dev/null +++ b/src/platform/telemetry/utils/surveyNormalization.ts @@ -0,0 +1,297 @@ +/** + * Survey Response Normalization Utilities + * + * Smart categorization system to normalize free-text survey responses + * into standardized categories for better analytics breakdowns. + */ + +/** + * Normalize industry responses into standardized categories + * Based on analysis of ~9,000 existing user responses + */ +export function normalizeIndustry(rawIndustry: string): string { + if (!rawIndustry || typeof rawIndustry !== 'string') { + return 'Other / Undefined' + } + + const industry = rawIndustry.toLowerCase().trim() + + // Film / TV / Animation (~2,885 users) + if ( + industry.match( + /film|tv|animation|story|anime|video|cinematography|visual effects|vfx|movie|cinema/ + ) + ) { + return 'Film / TV / Animation' + } + + // Marketing / Advertising / Social Media (~1,340 users) + if ( + industry.match( + /marketing|advertising|youtube|tiktok|social media|content creation|influencer|brand|promotion/ + ) + ) { + return 'Marketing / Advertising / Social Media' + } + + // Software / IT / AI (~1,100 users) + if ( + industry.match( + /software|it|ai|developer|consulting|engineering|tech|programmer|data science|machine learning/ + ) + ) { + return 'Software / IT / AI' + } + + // Product & Industrial Design (~1,050 users) + if ( + industry.match( + /product.?design|industrial|manufacturing|3d rendering|product visualization|mechanical|automotive/ + ) + ) { + return 'Product & Industrial Design' + } + + // Fine Art / Contemporary Art (~780 users) + if ( + industry.match( + /fine.?art|art|illustration|contemporary|artist|painting|drawing|sculpture|gallery/ + ) + ) { + return 'Fine Art / Contemporary Art' + } + + // Education / Research (~640 users) + if ( + industry.match( + /education|student|teacher|research|learning|university|school|academic|professor/ + ) + ) { + return 'Education / Research' + } + + // Architecture / Engineering / Construction (~420 users) + if ( + industry.match( + /architecture|construction|engineering|civil|cad|building|structural|landscape/ + ) + ) { + return 'Architecture / Engineering / Construction' + } + + // Gaming / Interactive Media (~410 users) + if ( + industry.match( + /gaming|game dev|roblox|interactive|virtual world|vr|ar|metaverse|simulation/ + ) + ) { + return 'Gaming / Interactive Media' + } + + // Photography / Videography (~70 users) + if ( + industry.match( + /photography|photo|videography|camera|image|portrait|wedding|commercial photo/ + ) + ) { + return 'Photography / Videography' + } + + // Fashion / Beauty / Retail (~25 users) + if ( + industry.match( + /fashion|beauty|jewelry|retail|style|clothing|cosmetics|makeup/ + ) + ) { + return 'Fashion / Beauty / Retail' + } + + // Music / Performing Arts (~25 users) + if ( + industry.match( + /music|vj|dance|projection mapping|audio visual|concert|performance|theater/ + ) + ) { + return 'Music / Performing Arts' + } + + // Healthcare / Medical / Life Science (~30 users) + if ( + industry.match( + /healthcare|medical|doctor|biotech|life science|pharmaceutical|clinical|hospital/ + ) + ) { + return 'Healthcare / Medical / Life Science' + } + + // E-commerce / Print-on-Demand / Business (~15 users) + if ( + industry.match( + /ecommerce|print on demand|shop|business|commercial|startup|entrepreneur|sales/ + ) + ) { + return 'E-commerce / Print-on-Demand / Business' + } + + // Nonprofit / Government / Public Sector (~15 users) + if ( + industry.match( + /501c3|ngo|government|public service|policy|nonprofit|charity|civic/ + ) + ) { + return 'Nonprofit / Government / Public Sector' + } + + // Adult / NSFW (~10 users) + if (industry.match(/nsfw|adult|erotic|explicit|xxx|porn/)) { + return 'Adult / NSFW' + } + + // Other / Undefined - catch common undefined responses + if ( + industry.match( + /other|none|undefined|unknown|n\/a|not applicable|^-$|^$|misc/ + ) + ) { + return 'Other / Undefined' + } + + // Uncategorized - preserve original but prefix for analysis + return `Uncategorized: ${rawIndustry}` +} + +/** + * Normalize use case responses into standardized categories + * Based on common patterns in user responses + */ +export function normalizeUseCase(rawUseCase: string): string { + if (!rawUseCase || typeof rawUseCase !== 'string') { + return 'Other / Undefined' + } + + const useCase = rawUseCase.toLowerCase().trim() + + // Content Creation & Marketing + if ( + useCase.match( + /content creation|social media|marketing|advertising|youtube|tiktok|instagram|thumbnails/ + ) + ) { + return 'Content Creation & Marketing' + } + + // Art & Illustration + if ( + useCase.match( + /art|illustration|drawing|painting|concept art|character design|digital art/ + ) + ) { + return 'Art & Illustration' + } + + // Product Visualization & Design + if ( + useCase.match( + /product|visualization|design|prototype|mockup|3d rendering|industrial design/ + ) + ) { + return 'Product Visualization & Design' + } + + // Film & Video Production + if ( + useCase.match( + /film|video|movie|animation|vfx|visual effects|storyboard|cinematography/ + ) + ) { + return 'Film & Video Production' + } + + // Gaming & Interactive Media + if ( + useCase.match(/game|gaming|interactive|vr|ar|virtual|simulation|metaverse/) + ) { + return 'Gaming & Interactive Media' + } + + // Architecture & Construction + if ( + useCase.match( + /architecture|building|construction|interior design|landscape|real estate/ + ) + ) { + return 'Architecture & Construction' + } + + // Education & Training + if ( + useCase.match( + /education|training|learning|teaching|tutorial|course|academic/ + ) + ) { + return 'Education & Training' + } + + // Research & Development + if ( + useCase.match( + /research|development|experiment|prototype|testing|analysis|study/ + ) + ) { + return 'Research & Development' + } + + // Personal & Hobby + if ( + useCase.match(/personal|hobby|fun|experiment|learning|curiosity|explore/) + ) { + return 'Personal & Hobby' + } + + // Photography & Image Processing + if ( + useCase.match( + /photography|photo|image|portrait|editing|enhancement|restoration/ + ) + ) { + return 'Photography & Image Processing' + } + + // Other / Undefined + if ( + useCase.match( + /other|none|undefined|unknown|n\/a|not applicable|^-$|^$|misc/ + ) + ) { + return 'Other / Undefined' + } + + // Uncategorized - preserve original but prefix for analysis + return `Uncategorized: ${rawUseCase}` +} + +/** + * Apply normalization to survey responses + * Creates both normalized and raw versions of responses + */ +export function normalizeSurveyResponses(responses: { + industry?: string + useCase?: string + [key: string]: any +}) { + const normalized = { ...responses } + + // Normalize industry + if (responses.industry) { + normalized.industry_normalized = normalizeIndustry(responses.industry) + normalized.industry_raw = responses.industry + } + + // Normalize use case + if (responses.useCase) { + normalized.useCase_normalized = normalizeUseCase(responses.useCase) + normalized.useCase_raw = responses.useCase + } + + return normalized +}