From 74887bcd6a1b1a31ecdaed8e2620b3a0ee8756a2 Mon Sep 17 00:00:00 2001 From: wooway777 Date: Fri, 3 Jul 2026 12:00:54 +0800 Subject: [PATCH] chore: format include, src, and python --- include/infinicore/analyzer.hpp | 6 +- .../infinicore/analyzer/intent_generator.hpp | 16 +- include/infinicore/analyzer/op_trace.hpp | 6 +- include/infinicore/analyzer/op_type.hpp | 138 ++++++++++++------ .../infinicore/analyzer/op_type_registry.hpp | 42 +++--- .../analyzer/optimization_intent.hpp | 83 +++++++---- .../infinicore/analyzer/resource_sensor.hpp | 36 +++-- include/infinicore/graph/graph.hpp | 48 +++--- include/infinicore/ops/common/dispatcher.hpp | 5 +- .../quantization/base_quantization.hpp | 2 +- include/infinicore/quantization/gptq.hpp | 2 +- .../infinicore/nn/functional/causal_conv1d.py | 8 +- .../nn/functional/chunk_gated_delta_rule.py | 8 +- python/infinicore/nn/functional/layer_norm.py | 6 +- python/infinicore/nn/functional/rms_norm.py | 6 +- python/infinicore/ops/__init__.py | 1 + scripts/install.py | 3 +- scripts/python_test.py | 3 +- setup.py | 20 ++- .../nvidia/generate_kernels.py | 5 +- src/infiniop/ops/dequantize_awq/operator.cc | 2 +- .../ops/flash_attention/ninetoothed/build.py | 9 +- .../ops/kv_caching/ninetoothed/build.py | 3 +- .../ops/kv_caching/ninetoothed/kv_caching.py | 1 + src/infiniop/ops/swiglu/ninetoothed/build.py | 3 +- src/infiniop/ops/swiglu/ninetoothed/swiglu.py | 1 - 26 files changed, 275 insertions(+), 188 deletions(-) diff --git a/include/infinicore/analyzer.hpp b/include/infinicore/analyzer.hpp index 86736437f5..81e9ab1a9d 100644 --- a/include/infinicore/analyzer.hpp +++ b/include/infinicore/analyzer.hpp @@ -2,10 +2,10 @@ // Convenience header — includes all analyzer components. -#include "analyzer/op_type.hpp" +#include "analyzer/intent_generator.hpp" +#include "analyzer/mutual_awareness_analyzer.hpp" #include "analyzer/op_trace.hpp" +#include "analyzer/op_type.hpp" #include "analyzer/optimization_intent.hpp" #include "analyzer/phase_detector.hpp" #include "analyzer/resource_sensor.hpp" -#include "analyzer/intent_generator.hpp" -#include "analyzer/mutual_awareness_analyzer.hpp" diff --git a/include/infinicore/analyzer/intent_generator.hpp b/include/infinicore/analyzer/intent_generator.hpp index f73c9e880c..b166e4c1eb 100644 --- a/include/infinicore/analyzer/intent_generator.hpp +++ b/include/infinicore/analyzer/intent_generator.hpp @@ -1,7 +1,7 @@ #pragma once -#include "optimization_intent.hpp" #include "op_trace.hpp" +#include "optimization_intent.hpp" #include #include @@ -72,7 +72,9 @@ class IntentGenerator { PhaseType phase, const std::vector &window) const { - if (window.empty()) return 0.0f; + if (window.empty()) { + return 0.0f; + } size_t heavy_compute_ops = 0; for (auto &e : window) { @@ -201,7 +203,7 @@ class IntentGenerator { // Fusion is beneficial for bandwidth-bound phases (reduce memory traffic) hint.prefer_fused_ops = (bottleneck == BottleneckType::BANDWIDTH_BOUND) - || phase == PhaseType::DECODE; + || phase == PhaseType::DECODE; // In-place when memory is tight hint.prefer_in_place = (bottleneck == BottleneckType::MEMORY_BOUND); @@ -218,8 +220,8 @@ class IntentGenerator { // Async comm overlap for multi-device and communication phases hint.prefer_async_comm = (device_intents.size() > 1) - && (phase == PhaseType::GEMM_MLP_DENSE - || phase == PhaseType::COMMUNICATION); + && (phase == PhaseType::GEMM_MLP_DENSE + || phase == PhaseType::COMMUNICATION); return hint; } @@ -254,7 +256,9 @@ class IntentGenerator { default: break; } - if (match) matching++; + if (match) { + matching++; + } } return static_cast(matching) / static_cast(window.size()); diff --git a/include/infinicore/analyzer/op_trace.hpp b/include/infinicore/analyzer/op_trace.hpp index 49f9c4c8b3..0b3018c289 100644 --- a/include/infinicore/analyzer/op_trace.hpp +++ b/include/infinicore/analyzer/op_trace.hpp @@ -144,9 +144,9 @@ OpTraceRing &getGlobalOpTrace(); /// This is the function called from the INFINICORE_GRAPH_OP_RECORD_OR_RUN /// macro hook (when ENABLE_MUTUAL_AWARENESS is defined). inline void traceOp(OpType op_type, - const size_t *shape, size_t ndim, - uint8_t dtype, - uint8_t device_type, int8_t device_id) { + const size_t *shape, size_t ndim, + uint8_t dtype, + uint8_t device_type, int8_t device_id) { OpTraceEntry entry; entry.op_type = op_type; entry.setShape(shape, ndim); diff --git a/include/infinicore/analyzer/op_type.hpp b/include/infinicore/analyzer/op_type.hpp index 21444c2e41..cbb3ac1025 100644 --- a/include/infinicore/analyzer/op_type.hpp +++ b/include/infinicore/analyzer/op_type.hpp @@ -84,52 +84,98 @@ enum class OpType : uint8_t { /// Convert OpType to human-readable string. inline const char *opTypeToString(OpType type) { switch (type) { - case OpType::ATTENTION: return "attention"; - case OpType::FLASH_ATTENTION: return "flash_attention"; - case OpType::CAUSAL_SOFTMAX: return "causal_softmax"; - case OpType::PAGED_ATTENTION: return "paged_attention"; - case OpType::PAGED_ATTENTION_PREFILL: return "paged_attention_prefill"; - case OpType::MHA_KVCACHE: return "mha_kvcache"; - case OpType::MHA_VARLEN: return "mha_varlen"; - case OpType::SOFTMAX: return "softmax"; - case OpType::GEMM: return "gemm"; - case OpType::LINEAR: return "linear"; - case OpType::MATMUL: return "matmul"; - case OpType::INT8_GEMM: return "int8_gemm"; - case OpType::SCALED_MM_I8: return "scaled_mm_i8"; - case OpType::SILU: return "silu"; - case OpType::SILU_AND_MUL: return "silu_and_mul"; - case OpType::GELU: return "gelu"; - case OpType::SWIGLU: return "swiglu"; - case OpType::RELU: return "relu"; - case OpType::SIGMOID: return "sigmoid"; - case OpType::RMS_NORM: return "rms_norm"; - case OpType::ADD_RMS_NORM: return "add_rms_norm"; - case OpType::LAYER_NORM: return "layer_norm"; - case OpType::EMBEDDING: return "embedding"; - case OpType::ROPE: return "rope"; - case OpType::KV_CACHING: return "kv_caching"; - case OpType::PAGED_CACHING: return "paged_caching"; - case OpType::ADD: return "add"; - case OpType::MUL: return "mul"; - case OpType::SUB: return "sub"; - case OpType::SUM: return "sum"; - case OpType::RECIPROCAL: return "reciprocal"; - case OpType::PER_TENSOR_QUANT_I8: return "per_tensor_quant_i8"; - case OpType::PER_TENSOR_DEQUANT_I8: return "per_tensor_dequant_i8"; - case OpType::PER_CHANNEL_QUANT_I8: return "per_channel_quant_i8"; - case OpType::DEQUANTIZE_AWQ: return "dequantize_awq"; - case OpType::DEQUANTIZE_GPTQ: return "dequantize_gptq"; - case OpType::RANDOM_SAMPLE: return "random_sample"; - case OpType::TOPK: return "topk"; - case OpType::TOPK_ROUTER: return "topk_router"; - case OpType::TOPK_SOFTMAX: return "topk_softmax"; - case OpType::ALLREDUCE: return "allreduce"; - case OpType::REARRANGE: return "rearrange"; - case OpType::ONES: return "ones"; - case OpType::ZEROS: return "zeros"; - case OpType::TAKE: return "take"; - default: return "unknown"; + case OpType::ATTENTION: + return "attention"; + case OpType::FLASH_ATTENTION: + return "flash_attention"; + case OpType::CAUSAL_SOFTMAX: + return "causal_softmax"; + case OpType::PAGED_ATTENTION: + return "paged_attention"; + case OpType::PAGED_ATTENTION_PREFILL: + return "paged_attention_prefill"; + case OpType::MHA_KVCACHE: + return "mha_kvcache"; + case OpType::MHA_VARLEN: + return "mha_varlen"; + case OpType::SOFTMAX: + return "softmax"; + case OpType::GEMM: + return "gemm"; + case OpType::LINEAR: + return "linear"; + case OpType::MATMUL: + return "matmul"; + case OpType::INT8_GEMM: + return "int8_gemm"; + case OpType::SCALED_MM_I8: + return "scaled_mm_i8"; + case OpType::SILU: + return "silu"; + case OpType::SILU_AND_MUL: + return "silu_and_mul"; + case OpType::GELU: + return "gelu"; + case OpType::SWIGLU: + return "swiglu"; + case OpType::RELU: + return "relu"; + case OpType::SIGMOID: + return "sigmoid"; + case OpType::RMS_NORM: + return "rms_norm"; + case OpType::ADD_RMS_NORM: + return "add_rms_norm"; + case OpType::LAYER_NORM: + return "layer_norm"; + case OpType::EMBEDDING: + return "embedding"; + case OpType::ROPE: + return "rope"; + case OpType::KV_CACHING: + return "kv_caching"; + case OpType::PAGED_CACHING: + return "paged_caching"; + case OpType::ADD: + return "add"; + case OpType::MUL: + return "mul"; + case OpType::SUB: + return "sub"; + case OpType::SUM: + return "sum"; + case OpType::RECIPROCAL: + return "reciprocal"; + case OpType::PER_TENSOR_QUANT_I8: + return "per_tensor_quant_i8"; + case OpType::PER_TENSOR_DEQUANT_I8: + return "per_tensor_dequant_i8"; + case OpType::PER_CHANNEL_QUANT_I8: + return "per_channel_quant_i8"; + case OpType::DEQUANTIZE_AWQ: + return "dequantize_awq"; + case OpType::DEQUANTIZE_GPTQ: + return "dequantize_gptq"; + case OpType::RANDOM_SAMPLE: + return "random_sample"; + case OpType::TOPK: + return "topk"; + case OpType::TOPK_ROUTER: + return "topk_router"; + case OpType::TOPK_SOFTMAX: + return "topk_softmax"; + case OpType::ALLREDUCE: + return "allreduce"; + case OpType::REARRANGE: + return "rearrange"; + case OpType::ONES: + return "ones"; + case OpType::ZEROS: + return "zeros"; + case OpType::TAKE: + return "take"; + default: + return "unknown"; } } diff --git a/include/infinicore/analyzer/op_type_registry.hpp b/include/infinicore/analyzer/op_type_registry.hpp index 8d7bc0135b..4445f3a514 100644 --- a/include/infinicore/analyzer/op_type_registry.hpp +++ b/include/infinicore/analyzer/op_type_registry.hpp @@ -11,36 +11,36 @@ namespace infinicore::analyzer { inline OpType opTypeFromName(const char *name) { static const std::unordered_map registry = { // Attention - {"FlashAttention", OpType::FLASH_ATTENTION}, - {"CausalSoftmax", OpType::CAUSAL_SOFTMAX}, - {"PagedAttention", OpType::PAGED_ATTENTION}, - {"MhaKVCache", OpType::MHA_KVCACHE}, + {"FlashAttention", OpType::FLASH_ATTENTION}, + {"CausalSoftmax", OpType::CAUSAL_SOFTMAX}, + {"PagedAttention", OpType::PAGED_ATTENTION}, + {"MhaKVCache", OpType::MHA_KVCACHE}, {"MultiheadAttentionVarlen", OpType::MHA_VARLEN}, // GEMM / MLP - {"Gemm", OpType::GEMM}, - {"I8Gemm", OpType::SCALED_MM_I8}, + {"Gemm", OpType::GEMM}, + {"I8Gemm", OpType::SCALED_MM_I8}, // Activation - {"SiluAndMul", OpType::SILU_AND_MUL}, - {"SwiGLU", OpType::SWIGLU}, + {"SiluAndMul", OpType::SILU_AND_MUL}, + {"SwiGLU", OpType::SWIGLU}, // Norm - {"RMSNorm", OpType::RMS_NORM}, - {"AddRMSNorm", OpType::ADD_RMS_NORM}, + {"RMSNorm", OpType::RMS_NORM}, + {"AddRMSNorm", OpType::ADD_RMS_NORM}, // Embedding / Positional - {"Embedding", OpType::EMBEDDING}, - {"RoPE", OpType::ROPE}, + {"Embedding", OpType::EMBEDDING}, + {"RoPE", OpType::ROPE}, // KV Cache - {"KVCaching", OpType::KV_CACHING}, - {"PagedCaching", OpType::PAGED_CACHING}, + {"KVCaching", OpType::KV_CACHING}, + {"PagedCaching", OpType::PAGED_CACHING}, // Elementwise - {"Add", OpType::ADD}, - {"Mul", OpType::MUL}, + {"Add", OpType::ADD}, + {"Mul", OpType::MUL}, // Quantization - {"PerTensorQuantI8", OpType::PER_TENSOR_QUANT_I8}, - {"PerTensorDequantI8", OpType::PER_TENSOR_DEQUANT_I8}, - {"PerChannelQuantI8", OpType::PER_CHANNEL_QUANT_I8}, - {"DequantizeAWQ", OpType::DEQUANTIZE_AWQ}, + {"PerTensorQuantI8", OpType::PER_TENSOR_QUANT_I8}, + {"PerTensorDequantI8", OpType::PER_TENSOR_DEQUANT_I8}, + {"PerChannelQuantI8", OpType::PER_CHANNEL_QUANT_I8}, + {"DequantizeAWQ", OpType::DEQUANTIZE_AWQ}, // Misc - {"Rearrange", OpType::REARRANGE}, + {"Rearrange", OpType::REARRANGE}, }; auto it = registry.find(name); return it != registry.end() ? it->second : OpType::UNKNOWN; diff --git a/include/infinicore/analyzer/optimization_intent.hpp b/include/infinicore/analyzer/optimization_intent.hpp index 9b423a281c..19d6c868ce 100644 --- a/include/infinicore/analyzer/optimization_intent.hpp +++ b/include/infinicore/analyzer/optimization_intent.hpp @@ -15,25 +15,33 @@ namespace infinicore::analyzer { /// Recognized task phase types in LLM inference. enum class PhaseType : uint8_t { UNKNOWN = 0, - PREFILL, // Long-sequence prefill phase - DECODE, // Autoregressive decode phase - ATTENTION_DENSE, // Attention-dominated segment - GEMM_MLP_DENSE, // GEMM/MLP-dominated segment - MOE_ROUTING, // MoE routing segment (future) - KV_CACHE, // KV cache manipulation segment - COMMUNICATION, // Communication-dominated segment (future) + PREFILL, // Long-sequence prefill phase + DECODE, // Autoregressive decode phase + ATTENTION_DENSE, // Attention-dominated segment + GEMM_MLP_DENSE, // GEMM/MLP-dominated segment + MOE_ROUTING, // MoE routing segment (future) + KV_CACHE, // KV cache manipulation segment + COMMUNICATION, // Communication-dominated segment (future) }; inline const char *phaseTypeToString(PhaseType phase) { switch (phase) { - case PhaseType::PREFILL: return "prefill"; - case PhaseType::DECODE: return "decode"; - case PhaseType::ATTENTION_DENSE: return "attention_dense"; - case PhaseType::GEMM_MLP_DENSE: return "gemm_mlp_dense"; - case PhaseType::MOE_ROUTING: return "moe_routing"; - case PhaseType::KV_CACHE: return "kv_cache"; - case PhaseType::COMMUNICATION: return "communication"; - default: return "unknown"; + case PhaseType::PREFILL: + return "prefill"; + case PhaseType::DECODE: + return "decode"; + case PhaseType::ATTENTION_DENSE: + return "attention_dense"; + case PhaseType::GEMM_MLP_DENSE: + return "gemm_mlp_dense"; + case PhaseType::MOE_ROUTING: + return "moe_routing"; + case PhaseType::KV_CACHE: + return "kv_cache"; + case PhaseType::COMMUNICATION: + return "communication"; + default: + return "unknown"; } } @@ -48,12 +56,18 @@ enum class BottleneckType : uint8_t { inline const char *bottleneckTypeToString(BottleneckType bt) { switch (bt) { - case BottleneckType::COMPUTE_BOUND: return "compute_bound"; - case BottleneckType::MEMORY_BOUND: return "memory_bound"; - case BottleneckType::BANDWIDTH_BOUND: return "bandwidth_bound"; - case BottleneckType::COMMUNICATION_BOUND: return "communication_bound"; - case BottleneckType::BALANCED: return "balanced"; - default: return "unknown"; + case BottleneckType::COMPUTE_BOUND: + return "compute_bound"; + case BottleneckType::MEMORY_BOUND: + return "memory_bound"; + case BottleneckType::BANDWIDTH_BOUND: + return "bandwidth_bound"; + case BottleneckType::COMMUNICATION_BOUND: + return "communication_bound"; + case BottleneckType::BALANCED: + return "balanced"; + default: + return "unknown"; } } @@ -67,11 +81,16 @@ enum class OptimizationGoal : uint8_t { inline const char *optimizationGoalToString(OptimizationGoal goal) { switch (goal) { - case OptimizationGoal::LATENCY_FIRST: return "latency_first"; - case OptimizationGoal::THROUGHPUT_FIRST: return "throughput_first"; - case OptimizationGoal::MEMORY_SAFE: return "memory_safe"; - case OptimizationGoal::STABILITY_FIRST: return "stability_first"; - default: return "unknown"; + case OptimizationGoal::LATENCY_FIRST: + return "latency_first"; + case OptimizationGoal::THROUGHPUT_FIRST: + return "throughput_first"; + case OptimizationGoal::MEMORY_SAFE: + return "memory_safe"; + case OptimizationGoal::STABILITY_FIRST: + return "stability_first"; + default: + return "unknown"; } } @@ -85,10 +104,10 @@ inline const char *optimizationGoalToString(OptimizationGoal goal) { /// Strategy hints generated by the analyzer. struct StrategyHint { - bool prefer_fused_ops = false; // Suggest op fusion? - bool prefer_in_place = false; // Suggest in-place computation? - bool prefer_recomputation = false; // Suggest recompute to save memory? - bool prefer_async_comm = false; // Suggest async comm overlap? + bool prefer_fused_ops = false; // Suggest op fusion? + bool prefer_in_place = false; // Suggest in-place computation? + bool prefer_recomputation = false; // Suggest recompute to save memory? + bool prefer_async_comm = false; // Suggest async comm overlap? }; /// Global semantic-level intent — describes the task's overall @@ -114,8 +133,8 @@ struct GlobalSemanticIntent { // --- Production metadata --- uint64_t timestamp_ns = 0; - uint32_t op_window_start = 0; // Start index of analyzed op window - uint32_t op_window_end = 0; // End index (exclusive) of analyzed op window + uint32_t op_window_start = 0; // Start index of analyzed op window + uint32_t op_window_end = 0; // End index (exclusive) of analyzed op window }; /// Per-device local intent — resource-aware refinement diff --git a/include/infinicore/analyzer/resource_sensor.hpp b/include/infinicore/analyzer/resource_sensor.hpp index a5f18b234e..72c871080b 100644 --- a/include/infinicore/analyzer/resource_sensor.hpp +++ b/include/infinicore/analyzer/resource_sensor.hpp @@ -11,15 +11,15 @@ namespace infinicore::analyzer { /// Memory statistics from the allocator. struct MemoryStats { - size_t allocated_bytes = 0; // Currently allocated bytes - size_t total_capacity = 0; // Total pool capacity in bytes - size_t peak_allocated = 0; // Peak allocation since last reset - size_t allocation_count = 0; // Number of active allocations + size_t allocated_bytes = 0; // Currently allocated bytes + size_t total_capacity = 0; // Total pool capacity in bytes + size_t peak_allocated = 0; // Peak allocation since last reset + size_t allocation_count = 0; // Number of active allocations float usageRatio() const { return total_capacity > 0 - ? static_cast(allocated_bytes) / static_cast(total_capacity) - : 0.0f; + ? static_cast(allocated_bytes) / static_cast(total_capacity) + : 0.0f; } }; @@ -70,10 +70,18 @@ struct DeviceResourceSnapshot { float resourceConfidence() const { float confidence = 0.0f; - if (has_memory_capacity) confidence += 0.35f; - if (has_compute_utilization) confidence += 0.25f; - if (has_memory_bandwidth_utilization) confidence += 0.25f; - if (has_communication) confidence += 0.15f; + if (has_memory_capacity) { + confidence += 0.35f; + } + if (has_compute_utilization) { + confidence += 0.25f; + } + if (has_memory_bandwidth_utilization) { + confidence += 0.25f; + } + if (has_communication) { + confidence += 0.15f; + } return std::min(confidence, 1.0f); } }; @@ -94,8 +102,8 @@ class ResourceSensor { intent.device_id = snapshot.device_id; intent.memory_usage_ratio = snapshot.memoryUsageRatio(); intent.memory_available_bytes = snapshot.free_bytes > 0 - ? snapshot.free_bytes - : ((snapshot.total_bytes >= snapshot.used_bytes) ? (snapshot.total_bytes - snapshot.used_bytes) : 0); + ? snapshot.free_bytes + : ((snapshot.total_bytes >= snapshot.used_bytes) ? (snapshot.total_bytes - snapshot.used_bytes) : 0); intent.compute_utilization = snapshot.compute_utilization; intent.memory_bandwidth_utilization = snapshot.memory_bandwidth_utilization; intent.communication_time_ratio = snapshot.communication_time_ratio; @@ -149,8 +157,8 @@ class ResourceSensor { snapshot.device_type = device_type; snapshot.has_memory_capacity = stats.total_capacity > 0; snapshot.free_bytes = stats.total_capacity >= stats.allocated_bytes - ? (stats.total_capacity - stats.allocated_bytes) - : 0; + ? (stats.total_capacity - stats.allocated_bytes) + : 0; snapshot.total_bytes = stats.total_capacity; snapshot.used_bytes = stats.allocated_bytes; snapshot.reserved_bytes = stats.total_capacity; diff --git a/include/infinicore/graph/graph.hpp b/include/infinicore/graph/graph.hpp index f477bcc67c..be368f92d7 100644 --- a/include/infinicore/graph/graph.hpp +++ b/include/infinicore/graph/graph.hpp @@ -93,35 +93,35 @@ class Graph { // Trace one op invocation into the global ring. Op type is resolved by // stringified class name through `opTypeFromName`, so new graph ops are // automatically discoverable without modifying the op header. -#define _INFINICORE_TRACE_OP(__OP_NAME__, __TRACE_TENSOR__) \ - do { \ - auto __op_type = ::infinicore::analyzer::opTypeFromName(#__OP_NAME__);\ - auto &&__trace_tensor = (__TRACE_TENSOR__); \ - if (__trace_tensor) { \ - const auto &__trace_shape = __trace_tensor->shape(); \ - const auto __trace_device = __trace_tensor->device(); \ - ::infinicore::analyzer::traceOp( \ - __op_type, \ - __trace_shape.data(), \ - __trace_shape.size(), \ - static_cast(__trace_tensor->dtype()), \ - static_cast(__trace_device.getType()), \ - static_cast(__trace_device.getIndex())); \ - } else { \ - ::infinicore::analyzer::traceOp(__op_type, nullptr, 0, 0, 0, -1);\ - } \ +#define _INFINICORE_TRACE_OP(__OP_NAME__, __TRACE_TENSOR__) \ + do { \ + auto __op_type = ::infinicore::analyzer::opTypeFromName(#__OP_NAME__); \ + auto &&__trace_tensor = (__TRACE_TENSOR__); \ + if (__trace_tensor) { \ + const auto &__trace_shape = __trace_tensor->shape(); \ + const auto __trace_device = __trace_tensor->device(); \ + ::infinicore::analyzer::traceOp( \ + __op_type, \ + __trace_shape.data(), \ + __trace_shape.size(), \ + static_cast(__trace_tensor->dtype()), \ + static_cast(__trace_device.getType()), \ + static_cast(__trace_device.getIndex())); \ + } else { \ + ::infinicore::analyzer::traceOp(__op_type, nullptr, 0, 0, 0, -1); \ + } \ } while (0) #else #define _INFINICORE_TRACE_OP(__OP_NAME__, __TRACE_TENSOR__) ((void)0) #endif -#define INFINICORE_GRAPH_OP_RECORD_OR_RUN(__OP_NAME__, ...) \ - auto ___op = std::make_shared<__OP_NAME__>(__VA_ARGS__); \ - if (context::isGraphRecording()) { \ - context::addGraphOperator(___op); \ - } else { \ - ___op->run(); \ - } \ +#define INFINICORE_GRAPH_OP_RECORD_OR_RUN(__OP_NAME__, ...) \ + auto ___op = std::make_shared<__OP_NAME__>(__VA_ARGS__); \ + if (context::isGraphRecording()) { \ + context::addGraphOperator(___op); \ + } else { \ + ___op->run(); \ + } \ _INFINICORE_TRACE_OP(__OP_NAME__, INFINICORE_DETAIL_FIRST_ARG(__VA_ARGS__)); #define INFINICORE_GRAPH_OP_REGISTER_ALLDEVICE(__OP_NAME__, __PLAN_F__, __RUN_F__, __CLEANUP_F__) \ diff --git a/include/infinicore/ops/common/dispatcher.hpp b/include/infinicore/ops/common/dispatcher.hpp index c5a0d5248c..9c5cac1793 100644 --- a/include/infinicore/ops/common/dispatcher.hpp +++ b/include/infinicore/ops/common/dispatcher.hpp @@ -66,11 +66,12 @@ class OpDispatcher { private: static std::size_t goalKey(Device::Type device_type, analyzer::OptimizationGoal goal) { return static_cast(device_type) * kGoalCount - + static_cast(goal); + + static_cast(goal); } std::array(Device::Type::COUNT) * kGoalCount> goal_table_{}; + static_cast(Device::Type::COUNT) * kGoalCount> + goal_table_{}; #endif private: diff --git a/include/infinicore/quantization/base_quantization.hpp b/include/infinicore/quantization/base_quantization.hpp index ceff0f54a3..4ee4f7608d 100644 --- a/include/infinicore/quantization/base_quantization.hpp +++ b/include/infinicore/quantization/base_quantization.hpp @@ -6,7 +6,7 @@ namespace infinicore::quantization { class BaseQuantization { // Base class for quantization schemes. Intended to be extended to support various quantization methods. public: - explicit BaseQuantization(const nlohmann::json &quant_config) : quant_config_(quant_config) {}; + explicit BaseQuantization(const nlohmann::json &quant_config) : quant_config_(quant_config){}; virtual ~BaseQuantization() = default; const nlohmann::json &get_config() const { return quant_config_; } diff --git a/include/infinicore/quantization/gptq.hpp b/include/infinicore/quantization/gptq.hpp index c4cd4bb02b..c03416b076 100644 --- a/include/infinicore/quantization/gptq.hpp +++ b/include/infinicore/quantization/gptq.hpp @@ -8,7 +8,7 @@ class GPTQ : public BaseQuantization { // information and support multiple quantization schemes. public: explicit GPTQ(const nlohmann::json &quant_config) - : BaseQuantization(quant_config) {}; + : BaseQuantization(quant_config){}; infinicore::quantization::QuantScheme get_quant_scheme() const override { diff --git a/python/infinicore/nn/functional/causal_conv1d.py b/python/infinicore/nn/functional/causal_conv1d.py index c7699819fe..04116a8586 100644 --- a/python/infinicore/nn/functional/causal_conv1d.py +++ b/python/infinicore/nn/functional/causal_conv1d.py @@ -41,9 +41,11 @@ def causal_conv1d( weight._underlying, None if bias is None else bias._underlying, None if cu_seqlens is None else cu_seqlens._underlying, - None - if initial_state_indices is None - else initial_state_indices._underlying, + ( + None + if initial_state_indices is None + else initial_state_indices._underlying + ), None if final_state_indices is None else final_state_indices._underlying, ) ) diff --git a/python/infinicore/nn/functional/chunk_gated_delta_rule.py b/python/infinicore/nn/functional/chunk_gated_delta_rule.py index 082743faa1..dd87a9f80c 100644 --- a/python/infinicore/nn/functional/chunk_gated_delta_rule.py +++ b/python/infinicore/nn/functional/chunk_gated_delta_rule.py @@ -56,9 +56,11 @@ def chunk_gated_delta_rule( beta._underlying, initial_state._underlying, None if cu_seqlens is None else cu_seqlens._underlying, - None - if initial_state_indices is None - else initial_state_indices._underlying, + ( + None + if initial_state_indices is None + else initial_state_indices._underlying + ), None if final_state_indices is None else final_state_indices._underlying, use_qk_l2norm, chunk_size, diff --git a/python/infinicore/nn/functional/layer_norm.py b/python/infinicore/nn/functional/layer_norm.py index b841fdc076..26a6e6979a 100644 --- a/python/infinicore/nn/functional/layer_norm.py +++ b/python/infinicore/nn/functional/layer_norm.py @@ -15,9 +15,9 @@ def layer_norm( ) -> Tensor: r"""Apply Layer Normalization.""" - assert normalized_shape == weight.shape, ( - "normalized_shape does not match weight.shape." - ) + assert ( + normalized_shape == weight.shape + ), "normalized_shape does not match weight.shape." if out is None: return Tensor( diff --git a/python/infinicore/nn/functional/rms_norm.py b/python/infinicore/nn/functional/rms_norm.py index 84cd82ae12..30fb701d20 100644 --- a/python/infinicore/nn/functional/rms_norm.py +++ b/python/infinicore/nn/functional/rms_norm.py @@ -14,9 +14,9 @@ def rms_norm( ) -> Tensor: r"""Apply Root Mean Square Layer Normalization.""" - assert normalized_shape == weight.shape, ( - "normalized_shape does not match weight.shape." - ) + assert ( + normalized_shape == weight.shape + ), "normalized_shape does not match weight.shape." if out is None: return Tensor(_infinicore.rms_norm(input._underlying, weight._underlying, eps)) diff --git a/python/infinicore/ops/__init__.py b/python/infinicore/ops/__init__.py index e69de29bb2..8b13789179 100644 --- a/python/infinicore/ops/__init__.py +++ b/python/infinicore/ops/__init__.py @@ -0,0 +1 @@ + diff --git a/scripts/install.py b/scripts/install.py index 98a448254e..8c75776a3d 100644 --- a/scripts/install.py +++ b/scripts/install.py @@ -1,7 +1,7 @@ import os import subprocess -import platform import sys + from set_env import ( set_env, set_env_by_config, @@ -10,6 +10,7 @@ PROJECT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) os.chdir(PROJECT_DIR) + def run_cmd(cmd): subprocess.run(cmd, text=True, encoding="utf-8", check=True, shell=True) diff --git a/scripts/python_test.py b/scripts/python_test.py index 4879da32a9..f23443447c 100644 --- a/scripts/python_test.py +++ b/scripts/python_test.py @@ -1,8 +1,9 @@ import os import subprocess -from set_env import set_env import sys +from set_env import set_env + PROJECT_DIR = os.path.abspath( os.path.join(os.path.dirname(__file__), "..", "test", "infiniop") ) diff --git a/setup.py b/setup.py index e79e41f539..479af83750 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,9 @@ import subprocess -from setuptools import setup, find_packages + +from setuptools import find_packages, setup from setuptools.command.build import build + def run_xmake_build(): print("Running xmake build...") subprocess.run(["xmake", "build"], check=True) @@ -9,26 +11,22 @@ def run_xmake_build(): subprocess.run(["xmake", "build", "-y", "_infinicore"], check=True) subprocess.run(["xmake", "install", "_infinicore"], check=True) + class Build(build): def run(self): run_xmake_build() super().run() + setup( # 1. Find main packages and manually add test/framework packages - packages=find_packages(where="python") + [ - "infinicore.test", - "infinicore.test.framework" - ], - + packages=find_packages(where="python") + + ["infinicore.test", "infinicore.test.framework"], # 2. Directory mappings package_dir={ "": "python", # Root package is under python/ directory - "infinicore.test": "test/infinicore" # Intermediate package mapping + "infinicore.test": "test/infinicore", # Intermediate package mapping }, - # 3. Register commands - cmdclass={ - "build": Build - } + cmdclass={"build": Build}, ) diff --git a/src/infiniop/ops/awq_marlin_gemm/nvidia/generate_kernels.py b/src/infiniop/ops/awq_marlin_gemm/nvidia/generate_kernels.py index e05296c52e..ab78d0ce20 100644 --- a/src/infiniop/ops/awq_marlin_gemm/nvidia/generate_kernels.py +++ b/src/infiniop/ops/awq_marlin_gemm/nvidia/generate_kernels.py @@ -32,12 +32,15 @@ """.lstrip() -FILE_HEAD = FILE_HEAD_COMMENT + """ +FILE_HEAD = ( + FILE_HEAD_COMMENT + + """ #include "../marlin/kernel.h" #include "../marlin/marlin_template.h" namespace MARLIN_NAMESPACE_NAME { """ +) TEMPLATE = ( "template __global__ void Marlin<" diff --git a/src/infiniop/ops/dequantize_awq/operator.cc b/src/infiniop/ops/dequantize_awq/operator.cc index 342e3da652..c63aed0fde 100644 --- a/src/infiniop/ops/dequantize_awq/operator.cc +++ b/src/infiniop/ops/dequantize_awq/operator.cc @@ -155,4 +155,4 @@ infiniopDestroyDequantizeAWQDescriptor(infiniopDequantizeAWQDescriptor_t desc) { #undef DELETE } -// #endif \ No newline at end of file +// #endif diff --git a/src/infiniop/ops/flash_attention/ninetoothed/build.py b/src/infiniop/ops/flash_attention/ninetoothed/build.py index 00f96b9c92..ef3235dfd9 100644 --- a/src/infiniop/ops/flash_attention/ninetoothed/build.py +++ b/src/infiniop/ops/flash_attention/ninetoothed/build.py @@ -1,12 +1,11 @@ +import os + import ninetoothed -from . import flash_attention -from .flash_attention import CausalVariant import infiniop.ninetoothed.build -import torch - -import os +from . import flash_attention +from .flash_attention import CausalVariant def build(): diff --git a/src/infiniop/ops/kv_caching/ninetoothed/build.py b/src/infiniop/ops/kv_caching/ninetoothed/build.py index 03481c86b7..f76497434d 100644 --- a/src/infiniop/ops/kv_caching/ninetoothed/build.py +++ b/src/infiniop/ops/kv_caching/ninetoothed/build.py @@ -1,8 +1,9 @@ import ninetoothed -from . import kv_caching import infiniop.ninetoothed.build +from . import kv_caching + def build(): dtype_values = ( diff --git a/src/infiniop/ops/kv_caching/ninetoothed/kv_caching.py b/src/infiniop/ops/kv_caching/ninetoothed/kv_caching.py index dfc5088e95..006dbe3770 100644 --- a/src/infiniop/ops/kv_caching/ninetoothed/kv_caching.py +++ b/src/infiniop/ops/kv_caching/ninetoothed/kv_caching.py @@ -1,4 +1,5 @@ import functools + import ninetoothed from ninetoothed import Tensor diff --git a/src/infiniop/ops/swiglu/ninetoothed/build.py b/src/infiniop/ops/swiglu/ninetoothed/build.py index fa4af6db24..10c82d5385 100644 --- a/src/infiniop/ops/swiglu/ninetoothed/build.py +++ b/src/infiniop/ops/swiglu/ninetoothed/build.py @@ -1,8 +1,9 @@ import ninetoothed -from . import swiglu import infiniop.ninetoothed.build +from . import swiglu + def build(): MAX_NDIM = 5 diff --git a/src/infiniop/ops/swiglu/ninetoothed/swiglu.py b/src/infiniop/ops/swiglu/ninetoothed/swiglu.py index 62074a84bf..2ede5e96a9 100644 --- a/src/infiniop/ops/swiglu/ninetoothed/swiglu.py +++ b/src/infiniop/ops/swiglu/ninetoothed/swiglu.py @@ -2,7 +2,6 @@ import ninetoothed.language as ntl from ninetoothed import Tensor - from ntops.kernels.element_wise import arrangement