Skriller0208 commited on
Commit
68aa6e6
·
verified ·
1 Parent(s): 720eb3e

Upload Makefile

Browse files
Files changed (1) hide show
  1. Makefile +1166 -0
Makefile ADDED
@@ -0,0 +1,1166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Define the default target now so that it is always the first target
2
+ BUILD_TARGETS = \
3
+ main \
4
+ bench \
5
+ quantize \
6
+ server \
7
+ tests/test-c.o
8
+
9
+ # Binaries only useful for tests
10
+ TEST_TARGETS = \
11
+ tests/test-backend-ops
12
+
13
+ # Deprecation aliases
14
+ ifdef WHISPER_CUBLAS
15
+ $(error WHISPER_CUBLAS is removed. Use GGML_CUDA instead.)
16
+ endif
17
+
18
+ ifdef WHISPER_CUDA
19
+ GGML_CUDA := 1
20
+ DEPRECATE_WARNING := 1
21
+ endif
22
+
23
+ ifdef WHISPER_KOMPUTE
24
+ GGML_KOMPUTE := 1
25
+ DEPRECATE_WARNING := 1
26
+ endif
27
+
28
+ ifdef WHISPER_METAL
29
+ GGML_METAL := 1
30
+ DEPRECATE_WARNING := 1
31
+ endif
32
+
33
+ ifdef WHISPER_OPENMP
34
+ GGML_OPENMP := 1
35
+ DEPRECATE_WARNING := 1
36
+ endif
37
+
38
+ ifdef WHISPER_RPC
39
+ GGML_RPC := 1
40
+ DEPRECATE_WARNING := 1
41
+ endif
42
+
43
+ ifdef WHISPER_SYCL
44
+ GGML_SYCL := 1
45
+ DEPRECATE_WARNING := 1
46
+ endif
47
+
48
+ ifdef WHISPER_SYCL_F16
49
+ GGML_SYCL_F16 := 1
50
+ DEPRECATE_WARNING := 1
51
+ endif
52
+
53
+ ifdef WHISPER_OPENBLAS
54
+ GGML_OPENBLAS := 1
55
+ DEPRECATE_WARNING := 1
56
+ endif
57
+
58
+ ifdef WHISPER_OPENBLAS64
59
+ GGML_OPENBLAS64 := 1
60
+ DEPRECATE_WARNING := 1
61
+ endif
62
+
63
+ ifdef WHISPER_BLIS
64
+ GGML_BLIS := 1
65
+ DEPRECATE_WARNING := 1
66
+ endif
67
+
68
+ ifdef WHISPER_NO_WHISPERFILE
69
+ GGML_NO_WHISPERFILE := 1
70
+ DEPRECATE_WARNING := 1
71
+ endif
72
+
73
+ ifdef WHISPER_NO_ACCELERATE
74
+ GGML_NO_ACCELERATE := 1
75
+ DEPRECATE_WARNING := 1
76
+ endif
77
+
78
+ ifdef WHISPER_NO_OPENMP
79
+ GGML_NO_OPENMP := 1
80
+ DEPRECATE_WARNING := 1
81
+ endif
82
+
83
+ ifdef WHISPER_NO_METAL
84
+ GGML_NO_METAL := 1
85
+ DEPRECATE_WARNING := 1
86
+ endif
87
+
88
+ ifndef UNAME_S
89
+ UNAME_S := $(shell uname -s)
90
+ endif
91
+
92
+ ifndef UNAME_P
93
+ UNAME_P := $(shell uname -p)
94
+ endif
95
+
96
+ ifndef UNAME_M
97
+ UNAME_M := $(shell uname -m)
98
+ endif
99
+
100
+ # In GNU make default CXX is g++ instead of c++. Let's fix that so that users
101
+ # of non-gcc compilers don't have to provide g++ alias or wrapper.
102
+ DEFCC := cc
103
+ DEFCXX := c++
104
+ ifeq ($(origin CC),default)
105
+ CC := $(DEFCC)
106
+ endif
107
+ ifeq ($(origin CXX),default)
108
+ CXX := $(DEFCXX)
109
+ endif
110
+
111
+ # Mac OS + Arm can report x86_64
112
+ # ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
113
+ ifeq ($(UNAME_S),Darwin)
114
+ ifndef GGML_NO_METAL
115
+ GGML_METAL := 1
116
+ endif
117
+
118
+ GGML_NO_OPENMP := 1
119
+
120
+ ifneq ($(UNAME_P),arm)
121
+ SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null)
122
+ ifeq ($(SYSCTL_M),1)
123
+ # UNAME_P := arm
124
+ # UNAME_M := arm64
125
+ warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789)
126
+ endif
127
+ endif
128
+ endif
129
+
130
+ ifdef GGML_METAL
131
+ GGML_METAL_EMBED_LIBRARY := 1
132
+ endif
133
+
134
+ ifdef GGML_RPC
135
+ BUILD_TARGETS += rpc-server
136
+ endif
137
+
138
+ ifeq ($(shell sdl2-config --cflags --libs 2>/dev/null),)
139
+ else
140
+ BUILD_TARGETS += \
141
+ command \
142
+ stream \
143
+ lsp \
144
+ talk \
145
+ talk-llama
146
+ endif
147
+
148
+ default: $(BUILD_TARGETS)
149
+
150
+ test: $(TEST_TARGETS)
151
+ @failures=0; \
152
+ for test_target in $(TEST_TARGETS); do \
153
+ echo "Running test $$test_target..."; \
154
+ ./$$test_target; \
155
+ if [ $$? -ne 0 ]; then \
156
+ printf 'Test %s FAILED!\n\n' $$test_target; \
157
+ failures=$$(( failures + 1 )); \
158
+ else \
159
+ printf 'Test %s passed.\n\n' $$test_target; \
160
+ fi; \
161
+ done; \
162
+ failures=$$(( failures + $$? )); \
163
+ if [ $$failures -gt 0 ]; then \
164
+ printf '\n%s tests failed.\n' $$failures; \
165
+ exit 1; \
166
+ fi
167
+ @echo 'All tests passed.'
168
+
169
+ all: $(BUILD_TARGETS) $(TEST_TARGETS)
170
+
171
+ ifdef RISCV_CROSS_COMPILE
172
+ CC := riscv64-unknown-linux-gnu-gcc
173
+ CXX := riscv64-unknown-linux-gnu-g++
174
+ endif
175
+
176
+ #
177
+ # Compile flags
178
+ #
179
+
180
+ # keep standard at C11 and C++11
181
+ MK_CPPFLAGS = -Iggml/include -Iggml/src -Iinclude -Isrc -Iexamples
182
+ MK_CFLAGS = -std=c11 -fPIC
183
+ MK_CXXFLAGS = -std=c++11 -fPIC
184
+ MK_NVCCFLAGS = -std=c++11
185
+
186
+ ifndef WHISPER_NO_CCACHE
187
+ CCACHE := $(shell which ccache)
188
+ ifdef CCACHE
189
+ export CCACHE_SLOPPINESS = time_macros
190
+ $(info I ccache found, compilation results will be cached. Disable with WHISPER_NO_CCACHE.)
191
+ CC := $(CCACHE) $(CC)
192
+ CXX := $(CCACHE) $(CXX)
193
+ else
194
+ $(info I ccache not found. Consider installing it for faster compilation.)
195
+ endif # CCACHE
196
+ endif # WHISPER_NO_CCACHE
197
+
198
+ # clock_gettime came in POSIX.1b (1993)
199
+ # CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional
200
+ # posix_memalign came in POSIX.1-2001 / SUSv3
201
+ # M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985)
202
+ MK_CPPFLAGS += -D_XOPEN_SOURCE=600
203
+
204
+ # Somehow in OpenBSD whenever POSIX conformance is specified
205
+ # some string functions rely on locale_t availability,
206
+ # which was introduced in POSIX.1-2008, forcing us to go higher
207
+ ifeq ($(UNAME_S),OpenBSD)
208
+ MK_CPPFLAGS += -U_XOPEN_SOURCE -D_XOPEN_SOURCE=700
209
+ endif
210
+
211
+ # Data types, macros and functions related to controlling CPU affinity and
212
+ # some memory allocation are available on Linux through GNU extensions in libc
213
+ ifeq ($(UNAME_S),Linux)
214
+ MK_CPPFLAGS += -D_GNU_SOURCE
215
+ endif
216
+
217
+ # RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1,
218
+ # and on macOS its availability depends on enabling Darwin extensions
219
+ # similarly on DragonFly, enabling BSD extensions is necessary
220
+ ifeq ($(UNAME_S),Darwin)
221
+ MK_CPPFLAGS += -D_DARWIN_C_SOURCE
222
+ endif
223
+ ifeq ($(UNAME_S),DragonFly)
224
+ MK_CPPFLAGS += -D__BSD_VISIBLE
225
+ endif
226
+
227
+ # alloca is a non-standard interface that is not visible on BSDs when
228
+ # POSIX conformance is specified, but not all of them provide a clean way
229
+ # to enable it in such cases
230
+ ifeq ($(UNAME_S),FreeBSD)
231
+ MK_CPPFLAGS += -D__BSD_VISIBLE
232
+ endif
233
+ ifeq ($(UNAME_S),NetBSD)
234
+ MK_CPPFLAGS += -D_NETBSD_SOURCE
235
+ endif
236
+ ifeq ($(UNAME_S),OpenBSD)
237
+ MK_CPPFLAGS += -D_BSD_SOURCE
238
+ endif
239
+
240
+ ifdef GGML_SCHED_MAX_COPIES
241
+ MK_CPPFLAGS += -DGGML_SCHED_MAX_COPIES=$(GGML_SCHED_MAX_COPIES)
242
+ endif
243
+
244
+ ifdef WHISPER_DEBUG
245
+ MK_CFLAGS += -O0 -g
246
+ MK_CXXFLAGS += -O0 -g
247
+ MK_LDFLAGS += -g
248
+ MK_NVCCFLAGS += -O0 -g
249
+
250
+ ifeq ($(UNAME_S),Linux)
251
+ MK_CPPFLAGS += -D_GLIBCXX_ASSERTIONS
252
+ endif
253
+ else
254
+ MK_CPPFLAGS += -DNDEBUG
255
+ MK_CFLAGS += -O3
256
+ MK_CXXFLAGS += -O3
257
+ MK_NVCCFLAGS += -O3
258
+ endif
259
+
260
+ ifdef WHISPER_SANITIZE_THREAD
261
+ MK_CFLAGS += -fsanitize=thread -g
262
+ MK_CXXFLAGS += -fsanitize=thread -g
263
+ MK_LDFLAGS += -fsanitize=thread -g
264
+ endif
265
+
266
+ ifdef WHISPER_SANITIZE_ADDRESS
267
+ MK_CFLAGS += -fsanitize=address -fno-omit-frame-pointer -g
268
+ MK_CXXFLAGS += -fsanitize=address -fno-omit-frame-pointer -g
269
+ MK_LDFLAGS += -fsanitize=address -fno-omit-frame-pointer -g
270
+ endif
271
+
272
+ ifdef WHISPER_SANITIZE_UNDEFINED
273
+ MK_CFLAGS += -fsanitize=undefined -g
274
+ MK_CXXFLAGS += -fsanitize=undefined -g
275
+ MK_LDFLAGS += -fsanitize=undefined -g
276
+ endif
277
+
278
+ ifdef WHISPER_SERVER_VERBOSE
279
+ MK_CPPFLAGS += -DSERVER_VERBOSE=$(WHISPER_SERVER_VERBOSE)
280
+ endif
281
+
282
+ ifdef WHISPER_SERVER_SSL
283
+ MK_CPPFLAGS += -DCPPHTTPLIB_OPENSSL_SUPPORT
284
+ MK_LDFLAGS += -lssl -lcrypto
285
+ endif
286
+
287
+ ifdef WHISPER_DISABLE_LOGS
288
+ MK_CPPFLAGS += -DLOG_DISABLE_LOGS
289
+ endif # WHISPER_DISABLE_LOGS
290
+
291
+ # warnings
292
+ WARN_FLAGS = \
293
+ -Wall \
294
+ -Wextra \
295
+ -Wpedantic \
296
+ -Wcast-qual \
297
+ -Wno-unused-function
298
+
299
+ MK_CFLAGS += \
300
+ $(WARN_FLAGS) \
301
+ -Wshadow \
302
+ -Wstrict-prototypes \
303
+ -Wpointer-arith \
304
+ -Wmissing-prototypes \
305
+ -Werror=implicit-int \
306
+ -Werror=implicit-function-declaration
307
+
308
+ MK_CXXFLAGS += \
309
+ $(WARN_FLAGS) \
310
+ -Wmissing-declarations \
311
+ -Wmissing-noreturn
312
+
313
+ ifeq ($(WHISPER_FATAL_WARNINGS),1)
314
+ MK_CFLAGS += -Werror
315
+ MK_CXXFLAGS += -Werror
316
+ endif
317
+
318
+ # this version of Apple ld64 is buggy
319
+ ifneq '' '$(findstring dyld-1015.7,$(shell $(CC) $(LDFLAGS) -Wl,-v 2>&1))'
320
+ MK_CPPFLAGS += -DHAVE_BUGGY_APPLE_LINKER
321
+ endif
322
+
323
+ # OS specific
324
+ # TODO: support Windows
325
+ ifneq '' '$(filter $(UNAME_S),Linux Darwin FreeBSD NetBSD OpenBSD Haiku)'
326
+ MK_CFLAGS += -pthread
327
+ MK_CXXFLAGS += -pthread
328
+ endif
329
+
330
+ # detect Windows
331
+ ifneq ($(findstring _NT,$(UNAME_S)),)
332
+ _WIN32 := 1
333
+ endif
334
+
335
+ # library name prefix
336
+ ifneq ($(_WIN32),1)
337
+ LIB_PRE := lib
338
+ endif
339
+
340
+ # Dynamic Shared Object extension
341
+ ifneq ($(_WIN32),1)
342
+ DSO_EXT := .so
343
+ else
344
+ DSO_EXT := .dll
345
+ endif
346
+
347
+ # Windows Sockets 2 (Winsock) for network-capable apps
348
+ ifeq ($(_WIN32),1)
349
+ LWINSOCK2 := -lws2_32
350
+ endif
351
+
352
+ ifdef WHISPER_GPROF
353
+ MK_CFLAGS += -pg
354
+ MK_CXXFLAGS += -pg
355
+ endif
356
+
357
+ # Architecture specific
358
+ # TODO: probably these flags need to be tweaked on some architectures
359
+ # feel free to update the Makefile for your architecture and send a pull request or issue
360
+
361
+ ifndef RISCV
362
+
363
+ ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
364
+ # Use all CPU extensions that are available:
365
+ MK_CFLAGS += -march=native -mtune=native
366
+ HOST_CXXFLAGS += -march=native -mtune=native
367
+
368
+ # Usage AVX-only
369
+ #MK_CFLAGS += -mfma -mf16c -mavx
370
+ #MK_CXXFLAGS += -mfma -mf16c -mavx
371
+
372
+ # Usage SSSE3-only (Not is SSE3!)
373
+ #MK_CFLAGS += -mssse3
374
+ #MK_CXXFLAGS += -mssse3
375
+ endif
376
+
377
+ ifneq '' '$(findstring mingw,$(shell $(CC) -dumpmachine))'
378
+ # The stack is only 16-byte aligned on Windows, so don't let gcc emit aligned moves.
379
+ # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54412
380
+ # https://github.com/ggerganov/llama.cpp/issues/2922
381
+ MK_CFLAGS += -Xassembler -muse-unaligned-vector-move
382
+ MK_CXXFLAGS += -Xassembler -muse-unaligned-vector-move
383
+
384
+ # Target Windows 8 for PrefetchVirtualMemory
385
+ MK_CPPFLAGS += -D_WIN32_WINNT=0x602
386
+ endif
387
+
388
+ ifneq ($(filter aarch64%,$(UNAME_M)),)
389
+ # Apple M1, M2, etc.
390
+ # Raspberry Pi 3, 4, Zero 2 (64-bit)
391
+ # Nvidia Jetson
392
+ MK_CFLAGS += -mcpu=native
393
+ MK_CXXFLAGS += -mcpu=native
394
+ JETSON_RELEASE_INFO = $(shell jetson_release)
395
+ ifdef JETSON_RELEASE_INFO
396
+ ifneq ($(filter TX2%,$(JETSON_RELEASE_INFO)),)
397
+ JETSON_EOL_MODULE_DETECT = 1
398
+ CC = aarch64-unknown-linux-gnu-gcc
399
+ cxx = aarch64-unknown-linux-gnu-g++
400
+ endif
401
+ endif
402
+ endif
403
+
404
+ ifneq ($(filter armv6%,$(UNAME_M)),)
405
+ # Raspberry Pi 1, Zero
406
+ MK_CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
407
+ MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
408
+ endif
409
+
410
+ ifneq ($(filter armv7%,$(UNAME_M)),)
411
+ # Raspberry Pi 2
412
+ MK_CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
413
+ MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
414
+ endif
415
+
416
+ ifneq ($(filter armv8%,$(UNAME_M)),)
417
+ # Raspberry Pi 3, 4, Zero 2 (32-bit)
418
+ MK_CFLAGS += -mfp16-format=ieee -mno-unaligned-access
419
+ MK_CXXFLAGS += -mfp16-format=ieee -mno-unaligned-access
420
+ endif
421
+
422
+ ifneq ($(filter ppc64%,$(UNAME_M)),)
423
+ POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
424
+ ifneq (,$(findstring POWER9,$(POWER9_M)))
425
+ MK_CFLAGS += -mcpu=power9
426
+ MK_CXXFLAGS += -mcpu=power9
427
+ endif
428
+ endif
429
+
430
+ ifneq ($(filter ppc64le%,$(UNAME_M)),)
431
+ MK_CFLAGS += -mcpu=powerpc64le
432
+ MK_CXXFLAGS += -mcpu=powerpc64le
433
+ CUDA_POWER_ARCH = 1
434
+ endif
435
+
436
+ ifneq ($(filter loongarch64%,$(UNAME_M)),)
437
+ MK_CFLAGS += -mlasx
438
+ MK_CXXFLAGS += -mlasx
439
+ endif
440
+
441
+ else
442
+ MK_CFLAGS += -march=rv64gcv -mabi=lp64d
443
+ MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d
444
+ endif
445
+
446
+ ifndef GGML_NO_ACCELERATE
447
+ # Mac OS - include Accelerate framework.
448
+ # `-framework Accelerate` works both with Apple Silicon and Mac Intel
449
+ ifeq ($(UNAME_S),Darwin)
450
+ MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS
451
+ MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK
452
+ MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64
453
+ MK_LDFLAGS += -framework Accelerate
454
+ OBJ_GGML += ggml/src/ggml-blas.o
455
+ endif
456
+ endif # GGML_NO_ACCELERATE
457
+
458
+ ifndef GGML_NO_OPENMP
459
+ MK_CPPFLAGS += -DGGML_USE_OPENMP
460
+ MK_CFLAGS += -fopenmp
461
+ MK_CXXFLAGS += -fopenmp
462
+ endif # GGML_NO_OPENMP
463
+
464
+ ifdef GGML_OPENBLAS
465
+ MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas)
466
+ MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas)
467
+ MK_LDFLAGS += $(shell pkg-config --libs openblas)
468
+ OBJ_GGML += ggml/src/ggml-blas.o
469
+ endif # GGML_OPENBLAS
470
+
471
+ ifdef GGML_OPENBLAS64
472
+ MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas64)
473
+ MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas64)
474
+ MK_LDFLAGS += $(shell pkg-config --libs openblas64)
475
+ OBJ_GGML += ggml/src/ggml-blas.o
476
+ endif # GGML_OPENBLAS64
477
+
478
+ ifdef GGML_BLIS
479
+ MK_CPPFLAGS += -DGGML_USE_BLAS -I/usr/local/include/blis -I/usr/include/blis
480
+ MK_LDFLAGS += -lblis -L/usr/local/lib
481
+ OBJ_GGML += ggml/src/ggml-blas.o
482
+ endif # GGML_BLIS
483
+
484
+ ifdef GGML_RPC
485
+ MK_CPPFLAGS += -DGGML_USE_RPC
486
+ OBJ_GGML += ggml/src/ggml-rpc.o
487
+ endif # GGML_RPC
488
+
489
+ OBJ_CUDA_TMPL = $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-wmma*.cu))
490
+ OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/mmq*.cu))
491
+
492
+ ifdef GGML_CUDA_FA_ALL_QUANTS
493
+ OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*.cu))
494
+ else
495
+ OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu))
496
+ OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu))
497
+ OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*f16-f16.cu))
498
+ endif # GGML_CUDA_FA_ALL_QUANTS
499
+
500
+ ifdef GGML_CUDA
501
+ ifneq ('', '$(wildcard /opt/cuda)')
502
+ CUDA_PATH ?= /opt/cuda
503
+ else
504
+ CUDA_PATH ?= /usr/local/cuda
505
+ endif
506
+
507
+ #MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include -DGGML_CUDA_USE_GRAPHS
508
+ #MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcufft -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
509
+ MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
510
+ MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
511
+ MK_NVCCFLAGS += -use_fast_math
512
+
513
+ OBJ_GGML += ggml/src/ggml-cuda.o
514
+ OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
515
+ OBJ_GGML += $(OBJ_CUDA_TMPL)
516
+
517
+ #OBJ_WHISPER += src/whisper-mel-cuda.o
518
+
519
+ ifdef WHISPER_FATAL_WARNINGS
520
+ MK_NVCCFLAGS += -Werror all-warnings
521
+ endif # WHISPER_FATAL_WARNINGS
522
+
523
+ ifndef JETSON_EOL_MODULE_DETECT
524
+ MK_NVCCFLAGS += --forward-unknown-to-host-compiler
525
+ endif # JETSON_EOL_MODULE_DETECT
526
+
527
+ ifdef WHISPER_DEBUG
528
+ MK_NVCCFLAGS += -lineinfo
529
+ endif # WHISPER_DEBUG
530
+
531
+ ifdef GGML_CUDA_DEBUG
532
+ MK_NVCCFLAGS += --device-debug
533
+ endif # GGML_CUDA_DEBUG
534
+
535
+ ifdef GGML_CUDA_NVCC
536
+ NVCC = $(CCACHE) $(GGML_CUDA_NVCC)
537
+ else
538
+ NVCC = $(CCACHE) nvcc
539
+ endif #GGML_CUDA_NVCC
540
+
541
+ ifdef CUDA_DOCKER_ARCH
542
+ MK_NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
543
+ else ifndef CUDA_POWER_ARCH
544
+ MK_NVCCFLAGS += -arch=native
545
+ endif # CUDA_DOCKER_ARCH
546
+
547
+ ifdef GGML_CUDA_FORCE_DMMV
548
+ MK_NVCCFLAGS += -DGGML_CUDA_FORCE_DMMV
549
+ endif # GGML_CUDA_FORCE_DMMV
550
+
551
+ ifdef GGML_CUDA_FORCE_MMQ
552
+ MK_NVCCFLAGS += -DGGML_CUDA_FORCE_MMQ
553
+ endif # GGML_CUDA_FORCE_MMQ
554
+
555
+ ifdef GGML_CUDA_DMMV_X
556
+ MK_NVCCFLAGS += -DGGML_CUDA_DMMV_X=$(GGML_CUDA_DMMV_X)
557
+ else
558
+ MK_NVCCFLAGS += -DGGML_CUDA_DMMV_X=32
559
+ endif # GGML_CUDA_DMMV_X
560
+
561
+ ifdef GGML_CUDA_MMV_Y
562
+ MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(GGML_CUDA_MMV_Y)
563
+ else ifdef GGML_CUDA_DMMV_Y
564
+ MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(GGML_CUDA_DMMV_Y) # for backwards compatibility
565
+ else
566
+ MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=1
567
+ endif # GGML_CUDA_MMV_Y
568
+
569
+ ifdef GGML_CUDA_F16
570
+ MK_NVCCFLAGS += -DGGML_CUDA_F16
571
+ endif # GGML_CUDA_F16
572
+
573
+ ifdef GGML_CUDA_DMMV_F16
574
+ MK_NVCCFLAGS += -DGGML_CUDA_F16
575
+ endif # GGML_CUDA_DMMV_F16
576
+
577
+ ifdef GGML_CUDA_KQUANTS_ITER
578
+ MK_NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(GGML_CUDA_KQUANTS_ITER)
579
+ else
580
+ MK_NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2
581
+ endif
582
+
583
+ ifdef GGML_CUDA_PEER_MAX_BATCH_SIZE
584
+ MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(GGML_CUDA_PEER_MAX_BATCH_SIZE)
585
+ else
586
+ MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128
587
+ endif # GGML_CUDA_PEER_MAX_BATCH_SIZE
588
+
589
+ ifdef GGML_CUDA_NO_PEER_COPY
590
+ MK_NVCCFLAGS += -DGGML_CUDA_NO_PEER_COPY
591
+ endif # GGML_CUDA_NO_PEER_COPY
592
+
593
+ ifdef GGML_CUDA_CCBIN
594
+ MK_NVCCFLAGS += -ccbin $(GGML_CUDA_CCBIN)
595
+ endif # GGML_CUDA_CCBIN
596
+
597
+ ifdef GGML_CUDA_FA_ALL_QUANTS
598
+ MK_NVCCFLAGS += -DGGML_CUDA_FA_ALL_QUANTS
599
+ endif # GGML_CUDA_FA_ALL_QUANTS
600
+
601
+ ifdef JETSON_EOL_MODULE_DETECT
602
+ define NVCC_COMPILE
603
+ $(NVCC) -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUDA -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -O3 $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
604
+ endef # NVCC_COMPILE
605
+ else
606
+ define NVCC_COMPILE
607
+ $(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
608
+ endef # NVCC_COMPILE
609
+ endif # JETSON_EOL_MODULE_DETECT
610
+
611
+ ggml/src/ggml-cuda/%.o: \
612
+ ggml/src/ggml-cuda/%.cu \
613
+ ggml/include/ggml.h \
614
+ ggml/src/ggml-common.h \
615
+ ggml/src/ggml-cuda/common.cuh
616
+ $(NVCC_COMPILE)
617
+
618
+ ggml/src/ggml-cuda.o: \
619
+ ggml/src/ggml-cuda.cu \
620
+ ggml/include/ggml.h \
621
+ ggml/include/ggml-backend.h \
622
+ ggml/include/ggml-cuda.h \
623
+ ggml/src/ggml-backend-impl.h \
624
+ ggml/src/ggml-common.h \
625
+ $(wildcard ggml/src/ggml-cuda/*.cuh)
626
+ $(NVCC_COMPILE)
627
+
628
+ #src/whisper-mel-cuda.o: src/whisper-mel-cuda.cu src/whisper-mel-cuda.hpp
629
+ # $(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
630
+
631
+ endif # GGML_CUDA
632
+
633
+ ifdef GGML_VULKAN
634
+ MK_CPPFLAGS += -DGGML_USE_VULKAN
635
+ MK_LDFLAGS += -lvulkan
636
+ OBJ_GGML += ggml/src/ggml-vulkan.o
637
+
638
+ ifdef GGML_VULKAN_CHECK_RESULTS
639
+ MK_CPPFLAGS += -DGGML_VULKAN_CHECK_RESULTS
640
+ endif
641
+
642
+ ifdef GGML_VULKAN_DEBUG
643
+ MK_CPPFLAGS += -DGGML_VULKAN_DEBUG
644
+ endif
645
+
646
+ ifdef GGML_VULKAN_MEMORY_DEBUG
647
+ MK_CPPFLAGS += -DGGML_VULKAN_MEMORY_DEBUG
648
+ endif
649
+
650
+ ifdef GGML_VULKAN_VALIDATE
651
+ MK_CPPFLAGS += -DGGML_VULKAN_VALIDATE
652
+ endif
653
+
654
+ ifdef GGML_VULKAN_RUN_TESTS
655
+ MK_CPPFLAGS += -DGGML_VULKAN_RUN_TESTS
656
+ endif
657
+
658
+ ggml/src/ggml-vulkan.o: \
659
+ ggml/src/ggml-vulkan.cpp \
660
+ ggml/include/ggml-vulkan.h
661
+ $(CXX) $(CXXFLAGS) -c $< -o $@
662
+ endif # GGML_VULKAN
663
+
664
+ ifdef GGML_HIPBLAS
665
+ ifeq ($(wildcard /opt/rocm),)
666
+ ROCM_PATH ?= /usr
667
+ AMDGPU_TARGETS ?= $(shell $(shell which amdgpu-arch))
668
+ else
669
+ ROCM_PATH ?= /opt/rocm
670
+ AMDGPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
671
+ endif
672
+
673
+ GGML_CUDA_DMMV_X ?= 32
674
+ GGML_CUDA_MMV_Y ?= 1
675
+ GGML_CUDA_KQUANTS_ITER ?= 2
676
+
677
+ MK_CPPFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUDA
678
+
679
+ ifdef GGML_HIP_UMA
680
+ MK_CPPFLAGS += -DGGML_HIP_UMA
681
+ endif # GGML_HIP_UMA
682
+
683
+ MK_LDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib
684
+ MK_LDFLAGS += -L$(ROCM_PATH)/lib64 -Wl,-rpath=$(ROCM_PATH)/lib64
685
+ MK_LDFLAGS += -lhipblas -lamdhip64 -lrocblas
686
+
687
+ HIPCC ?= $(CCACHE) $(ROCM_PATH)/bin/hipcc
688
+
689
+ HIPFLAGS += $(addprefix --offload-arch=,$(AMDGPU_TARGETS))
690
+ HIPFLAGS += -DGGML_CUDA_DMMV_X=$(GGML_CUDA_DMMV_X)
691
+ HIPFLAGS += -DGGML_CUDA_MMV_Y=$(GGML_CUDA_MMV_Y)
692
+ HIPFLAGS += -DK_QUANTS_PER_ITERATION=$(GGML_CUDA_KQUANTS_ITER)
693
+
694
+ ifdef GGML_CUDA_FORCE_DMMV
695
+ HIPFLAGS += -DGGML_CUDA_FORCE_DMMV
696
+ endif # GGML_CUDA_FORCE_DMMV
697
+
698
+ ifdef GGML_CUDA_NO_PEER_COPY
699
+ HIPFLAGS += -DGGML_CUDA_NO_PEER_COPY
700
+ endif # GGML_CUDA_NO_PEER_COPY
701
+
702
+ OBJ_GGML += ggml/src/ggml-cuda.o
703
+ OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
704
+ OBJ_GGML += $(OBJ_CUDA_TMPL)
705
+
706
+ ggml/src/ggml-cuda.o: \
707
+ ggml/src/ggml-cuda.cu \
708
+ ggml/include/ggml.h \
709
+ ggml/include/ggml-backend.h \
710
+ ggml/include/ggml-cuda.h \
711
+ ggml/src/ggml-backend-impl.h \
712
+ ggml/src/ggml-common.h \
713
+ $(wildcard ggml/src/ggml-cuda/*.cuh)
714
+ $(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
715
+
716
+ ggml/src/ggml-cuda/%.o: \
717
+ ggml/src/ggml-cuda/%.cu \
718
+ ggml/include/ggml.h \
719
+ ggml/src/ggml-common.h \
720
+ ggml/src/ggml-cuda/common.cuh
721
+ $(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
722
+ endif # GGML_HIPBLAS
723
+
724
+ ifdef GGML_METAL
725
+ MK_CPPFLAGS += -DGGML_USE_METAL
726
+ MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit
727
+ OBJ_GGML += ggml/src/ggml-metal.o
728
+ ifdef GGML_METAL_NDEBUG
729
+ MK_CPPFLAGS += -DGGML_METAL_NDEBUG
730
+ endif
731
+
732
+ ifdef GGML_METAL_EMBED_LIBRARY
733
+ MK_CPPFLAGS += -DGGML_METAL_EMBED_LIBRARY
734
+ OBJ_GGML += ggml/src/ggml-metal-embed.o
735
+ endif
736
+ endif # GGML_METAL
737
+
738
+ ifdef WHISPER_COREML
739
+ MK_CXXFLAGS += -DWHISPER_USE_COREML
740
+ LDFLAGS += -framework Foundation -framework CoreML
741
+
742
+ ifdef WHISPER_COREML_ALLOW_FALLBACK
743
+ MK_CXXFLAGS += -DWHISPER_COREML_ALLOW_FALLBACK
744
+ endif
745
+ endif
746
+
747
+ # ===
748
+
749
+ ifdef GGML_METAL
750
+ ggml/src/ggml-metal.o: \
751
+ ggml/src/ggml-metal.m \
752
+ ggml/include/ggml-metal.h \
753
+ ggml/include/ggml.h
754
+ $(CC) $(CFLAGS) -c $< -o $@
755
+
756
+ ifdef GGML_METAL_EMBED_LIBRARY
757
+ ggml/src/ggml-metal-embed.o: \
758
+ ggml/src/ggml-metal.metal \
759
+ ggml/src/ggml-common.h
760
+ @echo "Embedding Metal library"
761
+ @sed -e '/#include "ggml-common.h"/r ggml/src/ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml/src/ggml-metal.metal > ggml/src/ggml-metal-embed.metal
762
+ $(eval TEMP_ASSEMBLY=$(shell mktemp))
763
+ @echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)
764
+ @echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)
765
+ @echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)
766
+ @echo ".incbin \"ggml/src/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)
767
+ @echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)
768
+ @echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)
769
+ @$(AS) $(TEMP_ASSEMBLY) -o $@
770
+ @rm -f ${TEMP_ASSEMBLY}
771
+ endif
772
+ endif # GGML_METAL
773
+
774
+ ifdef WHISPER_COREML
775
+ src/coreml/whisper-encoder.o: src/coreml/whisper-encoder.mm src/coreml/whisper-encoder.h
776
+ $(CXX) -O3 -I . -fobjc-arc -c src/coreml/whisper-encoder.mm -o src/coreml/whisper-encoder.o
777
+
778
+ src/coreml/whisper-encoder-impl.o: src/coreml/whisper-encoder-impl.m src/coreml/whisper-encoder-impl.h
779
+ $(CXX) -O3 -I . -fobjc-arc -c src/coreml/whisper-encoder-impl.m -o src/coreml/whisper-encoder-impl.o
780
+
781
+ OBJ_WHISPER += src/coreml/whisper-encoder.o src/coreml/whisper-encoder-impl.o
782
+ endif
783
+
784
+ OBJ_GGML += \
785
+ ggml/src/ggml.o \
786
+ ggml/src/ggml-alloc.o \
787
+ ggml/src/ggml-backend.o \
788
+ ggml/src/ggml-quants.o \
789
+ ggml/src/ggml-aarch64.o
790
+
791
+ OBJ_WHISPER += \
792
+ src/whisper.o
793
+
794
+ OBJ_COMMON += \
795
+ examples/common.o \
796
+ examples/common-ggml.o \
797
+ examples/grammar-parser.o
798
+
799
+ OBJ_SDL += \
800
+ examples/common-sdl.o
801
+
802
+ OBJ_ALL = $(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON) $(OBJ_SDL)
803
+
804
+ LIB_GGML = $(LIB_PRE)ggml$(DSO_EXT)
805
+ LIB_GGML_S = $(LIB_PRE)ggml.a
806
+
807
+ LIB_WHISPER = $(LIB_PRE)whisper$(DSO_EXT)
808
+ LIB_WHISPER_S = $(LIB_PRE)whisper.a
809
+
810
+ LIB_COMMON = $(LIB_PRE)common$(DSO_EXT)
811
+ LIB_COMMON_S = $(LIB_PRE)common.a
812
+
813
+ LIB_COMMON_SDL = $(LIB_PRE)common-sdl$(DSO_EXT)
814
+ LIB_COMMON_SDL_S = $(LIB_PRE)common-sdl.a
815
+
816
+ LIB_ALL = $(LIB_GGML) $(LIB_WHISPER) $(LIB_COMMON) $(LIB_COMMON_SDL)
817
+ LIB_ALL_S = $(LIB_GGML_S) $(LIB_WHISPER_S) $(LIB_COMMON_S) $(LIB_COMMON_SDL_S)
818
+
819
+ GF_CC := $(CC)
820
+ include scripts/get-flags.mk
821
+
822
+ # combine build flags with cmdline overrides
823
+ override CPPFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS)
824
+ override CFLAGS := $(CPPFLAGS) $(MK_CFLAGS) $(GF_CFLAGS) $(CFLAGS)
825
+ BASE_CXXFLAGS := $(MK_CXXFLAGS) $(CXXFLAGS)
826
+ override CXXFLAGS := $(BASE_CXXFLAGS) $(HOST_CXXFLAGS) $(GF_CXXFLAGS) $(CPPFLAGS)
827
+ override NVCCFLAGS := $(MK_NVCCFLAGS) $(NVCCFLAGS)
828
+ override LDFLAGS := $(MK_LDFLAGS) $(LDFLAGS)
829
+
830
+ # identify CUDA host compiler
831
+ ifdef GGML_CUDA
832
+ GF_CC := $(NVCC) $(NVCCFLAGS) 2>/dev/null .c -Xcompiler
833
+ include scripts/get-flags.mk
834
+ CUDA_CXXFLAGS := $(BASE_CXXFLAGS) $(GF_CXXFLAGS) -Wno-pedantic
835
+ endif
836
+
837
+ ifdef WHISPER_CURL
838
+ override CXXFLAGS := $(CXXFLAGS) -DWHISPER_USE_CURL
839
+ override LDFLAGS := $(LDFLAGS) -lcurl
840
+ endif
841
+
842
+ #
843
+ # Print build information
844
+ #
845
+
846
+ $(info I whisper.cpp build info: )
847
+ $(info I UNAME_S: $(UNAME_S))
848
+ $(info I UNAME_P: $(UNAME_P))
849
+ $(info I UNAME_M: $(UNAME_M))
850
+ $(info I CFLAGS: $(CFLAGS))
851
+ $(info I CXXFLAGS: $(CXXFLAGS))
852
+ $(info I NVCCFLAGS: $(NVCCFLAGS))
853
+ $(info I LDFLAGS: $(LDFLAGS))
854
+ $(info I CC: $(shell $(CC) --version | head -n 1))
855
+ $(info I CXX: $(shell $(CXX) --version | head -n 1))
856
+ ifdef GGML_CUDA
857
+ $(info I NVCC: $(shell $(NVCC) --version | tail -n 1))
858
+ CUDA_VERSION := $(shell $(NVCC) --version | grep -oP 'release (\K[0-9]+\.[0-9])')
859
+ ifeq ($(shell awk -v "v=$(CUDA_VERSION)" 'BEGIN { print (v < 11.7) }'),1)
860
+
861
+ ifndef CUDA_DOCKER_ARCH
862
+ ifndef CUDA_POWER_ARCH
863
+ $(error I ERROR: For CUDA versions < 11.7 a target CUDA architecture must be explicitly provided via environment variable CUDA_DOCKER_ARCH, e.g. by running "export CUDA_DOCKER_ARCH=compute_XX" on Unix-like systems, where XX is the minimum compute capability that the code needs to run on. A list with compute capabilities can be found here: https://developer.nvidia.com/cuda-gpus )
864
+ endif # CUDA_POWER_ARCH
865
+ endif # CUDA_DOCKER_ARCH
866
+
867
+ endif # eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1)
868
+ endif # GGML_CUDA
869
+ $(info )
870
+
871
+ ifdef DEPRECATE_WARNING
872
+ $(info !!! DEPRECATION WARNING !!!)
873
+ $(info The following WHISPER_ options are deprecated and will be removed in the future. Use the GGML_ prefix instead)
874
+ $(info - WHISPER_CUDA)
875
+ $(info - WHISPER_METAL)
876
+ $(info - WHISPER_OPENMP)
877
+ $(info - WHISPER_RPC)
878
+ $(info - WHISPER_SYCL)
879
+ $(info - WHISPER_SYCL_F16)
880
+ $(info - WHISPER_OPENBLAS)
881
+ $(info - WHISPER_OPENBLAS64)
882
+ $(info - WHISPER_BLIS)
883
+ $(info - WHISPER_NO_LLAMAFILE)
884
+ $(info - WHISPER_NO_ACCELERATE)
885
+ $(info - WHISPER_NO_OPENMP)
886
+ $(info - WHISPER_NO_METAL)
887
+ $(info )
888
+ endif
889
+
890
+ #
891
+ # Build libraries
892
+ #
893
+
894
+ # ggml
895
+
896
+ ggml/src/ggml.o: \
897
+ ggml/src/ggml.c \
898
+ ggml/include/ggml.h
899
+ $(CC) $(CFLAGS) -c $< -o $@
900
+
901
+ ggml/src/ggml-alloc.o: \
902
+ ggml/src/ggml-alloc.c \
903
+ ggml/include/ggml.h \
904
+ ggml/include/ggml-alloc.h
905
+ $(CC) $(CFLAGS) -c $< -o $@
906
+
907
+ ggml/src/ggml-backend.o: \
908
+ ggml/src/ggml-backend.c \
909
+ ggml/include/ggml.h \
910
+ ggml/include/ggml-backend.h
911
+ $(CC) $(CFLAGS) -c $< -o $@
912
+
913
+ ggml/src/ggml-quants.o: \
914
+ ggml/src/ggml-quants.c \
915
+ ggml/include/ggml.h \
916
+ ggml/src/ggml-quants.h \
917
+ ggml/src/ggml-common.h
918
+ $(CC) $(CFLAGS) -c $< -o $@
919
+
920
+ ggml/src/ggml-aarch64.o: \
921
+ ggml/src/ggml-aarch64.c \
922
+ ggml/include/ggml.h \
923
+ ggml/src/ggml-aarch64.h \
924
+ ggml/src/ggml-common.h
925
+ $(CC) $(CFLAGS) -c $< -o $@
926
+
927
+ ggml/src/ggml-blas.o: \
928
+ ggml/src/ggml-blas.cpp \
929
+ ggml/include/ggml-blas.h
930
+ $(CXX) $(CXXFLAGS) -c $< -o $@
931
+
932
+ ifdef GGML_LLAMAFILE
933
+ ggml/src/sgemm.o: \
934
+ ggml/src/sgemm.cpp \
935
+ ggml/src/sgemm.h \
936
+ ggml/include/ggml.h
937
+ $(CXX) $(CXXFLAGS) -c $< -o $@
938
+ endif # GGML_LLAMAFILE
939
+
940
+ ifdef GGML_RPC
941
+ ggml/src/ggml-rpc.o: \
942
+ ggml/src/ggml-rpc.cpp \
943
+ ggml/include/ggml-rpc.h
944
+ $(CXX) $(CXXFLAGS) -c $< -o $@
945
+ endif # GGML_RPC
946
+
947
+ $(LIB_GGML): \
948
+ $(OBJ_GGML)
949
+ $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
950
+
951
+ $(LIB_GGML_S): \
952
+ $(OBJ_GGML)
953
+ ar rcs $(LIB_GGML_S) $^
954
+
955
+ # whisper
956
+
957
+ src/whisper.o: \
958
+ src/whisper.cpp \
959
+ src/whisper-mel.hpp \
960
+ include/whisper.h \
961
+ ggml/include/ggml.h \
962
+ ggml/include/ggml-alloc.h \
963
+ ggml/include/ggml-backend.h \
964
+ ggml/include/ggml-cuda.h \
965
+ ggml/include/ggml-metal.h
966
+ $(CXX) $(CXXFLAGS) -c $< -o $@
967
+
968
+ $(LIB_WHISPER): \
969
+ $(OBJ_WHISPER) \
970
+ $(LIB_GGML)
971
+ $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
972
+
973
+ $(LIB_WHISPER_S): \
974
+ $(OBJ_WHISPER)
975
+ ar rcs $(LIB_WHISPER_S) $^
976
+
977
+ # common
978
+
979
+ examples/common.o: \
980
+ examples/common.cpp \
981
+ examples/common.h
982
+ $(CXX) $(CXXFLAGS) -c $< -o $@
983
+
984
+ examples/common-ggml.o: \
985
+ examples/common-ggml.cpp \
986
+ examples/common-ggml.h
987
+ $(CXX) $(CXXFLAGS) -c $< -o $@
988
+
989
+ $(LIB_COMMON): \
990
+ $(OBJ_COMMON)
991
+ $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
992
+
993
+ $(LIB_COMMON_S): \
994
+ $(OBJ_COMMON)
995
+ ar rcs $(LIB_COMMON_S) $^
996
+
997
+ # common-sdl
998
+
999
+ CFLAGS_SDL=$(shell sdl2-config --cflags)
1000
+ LDFLAGS_SDL=$(shell sdl2-config --libs)
1001
+
1002
+ examples/common-sdl.o: \
1003
+ examples/common-sdl.cpp \
1004
+ examples/common-sdl.h
1005
+ $(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $@
1006
+
1007
+ $(LIB_COMMON_SDL): \
1008
+ $(OBJ_SDL)
1009
+ $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS) $(LDFLAGS_SDL)
1010
+
1011
+ $(LIB_COMMON_SDL_S): \
1012
+ $(OBJ_SDL)
1013
+ ar rcs $(LIB_COMMON_SDL_S) $^
1014
+
1015
+ clean:
1016
+ rm -vrf *.dot $(BUILD_TARGETS) $(TEST_TARGETS)
1017
+ rm -rvf src/*.o
1018
+ rm -rvf src/coreml/*.o
1019
+ rm -rvf tests/*.o
1020
+ rm -rvf examples/*.o
1021
+ rm -rvf *.a
1022
+ rm -rvf *.dll
1023
+ rm -rvf *.so
1024
+ rm -rvf *.dot
1025
+ rm -rvf ggml/*.a
1026
+ rm -rvf ggml/*.dll
1027
+ rm -rvf ggml/*.so
1028
+ rm -vrf ggml/src/*.o
1029
+ rm -vrf ggml/src/ggml-metal-embed.metal
1030
+ rm -vrf ggml/src/ggml-cuda/*.o
1031
+ rm -vrf ggml/src/ggml-cuda/template-instances/*.o
1032
+ rm -rvf $(BUILD_TARGETS)
1033
+ rm -rvf $(TEST_TARGETS)
1034
+ find examples -type f -name "*.o" -delete
1035
+
1036
+ #
1037
+ # Examples
1038
+ #
1039
+
1040
+ # $< is the first prerequisite, i.e. the source file.
1041
+ # Explicitly compile this to an object file so that it can be cached with ccache.
1042
+ # The source file is then filtered out from $^ (the list of all prerequisites) and the object file is added instead.
1043
+
1044
+ # Helper function that replaces .c, .cpp, and .cu file endings with .o:
1045
+ GET_OBJ_FILE = $(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(patsubst %.cu,%.o,$(1))))
1046
+
1047
+ main: examples/main/main.cpp \
1048
+ $(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON)
1049
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1050
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1051
+
1052
+ bench: examples/bench/bench.cpp \
1053
+ $(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON)
1054
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1055
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1056
+
1057
+ quantize: examples/quantize/quantize.cpp \
1058
+ $(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON)
1059
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1060
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1061
+
1062
+ server: examples/server/server.cpp \
1063
+ $(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON)
1064
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1065
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LWINSOCK2)
1066
+
1067
+ command: examples/command/command.cpp \
1068
+ $(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON) $(OBJ_SDL)
1069
+ $(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $(call GET_OBJ_FILE, $<)
1070
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LDFLAGS_SDL)
1071
+
1072
+ stream: examples/stream/stream.cpp \
1073
+ $(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON) $(OBJ_SDL)
1074
+ $(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $(call GET_OBJ_FILE, $<)
1075
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LDFLAGS_SDL)
1076
+
1077
+ lsp: examples/lsp/lsp.cpp \
1078
+ $(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON) $(OBJ_SDL)
1079
+ $(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $(call GET_OBJ_FILE, $<)
1080
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LDFLAGS_SDL)
1081
+
1082
+ talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp \
1083
+ $(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON) $(OBJ_SDL)
1084
+ $(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $(call GET_OBJ_FILE, $<)
1085
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LDFLAGS_SDL)
1086
+
1087
+ talk-llama: examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp examples/talk-llama/llama-vocab.cpp examples/talk-llama/llama-grammar.cpp examples/talk-llama/llama-sampling.cpp examples/talk-llama/unicode.cpp examples/talk-llama/unicode-data.cpp \
1088
+ $(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON) $(OBJ_SDL)
1089
+ $(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $(call GET_OBJ_FILE, $<)
1090
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LDFLAGS_SDL)
1091
+
1092
+ #
1093
+ # Tests
1094
+ #
1095
+
1096
+ tests: $(TEST_TARGETS)
1097
+
1098
+ tests/test-c.o: tests/test-c.c include/whisper.h
1099
+ $(CC) $(CFLAGS) -c $(filter-out %.h,$^) -o $@
1100
+
1101
+ tests/test-backend-ops: tests/test-backend-ops.cpp \
1102
+ $(OBJ_GGML)
1103
+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1104
+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1105
+
1106
+ #
1107
+ # Audio samples
1108
+ #
1109
+
1110
+ # download a few audio samples into folder "./samples":
1111
+ .PHONY: samples
1112
+ samples:
1113
+ @echo "Downloading samples..."
1114
+ @mkdir -p samples
1115
+ @wget --quiet --show-progress -O samples/gb0.ogg https://upload.wikimedia.org/wikipedia/commons/2/22/George_W._Bush%27s_weekly_radio_address_%28November_1%2C_2008%29.oga
1116
+ @wget --quiet --show-progress -O samples/gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
1117
+ @wget --quiet --show-progress -O samples/hp0.ogg https://upload.wikimedia.org/wikipedia/en/d/d4/En.henryfphillips.ogg
1118
+ @wget --quiet --show-progress -O samples/mm1.wav https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav
1119
+ @wget --quiet --show-progress -O samples/a13.mp3 https://upload.wikimedia.org/wikipedia/commons/transcoded/6/6f/Apollo13-wehaveaproblem.ogg/Apollo13-wehaveaproblem.ogg.mp3
1120
+ @wget --quiet --show-progress -O samples/diffusion2023-07-03.flac https://archive.org/download/diffusion2023-07-03/diffusion2023-07-03.flac
1121
+ @echo "Converting to 16-bit WAV ..."
1122
+ @ffmpeg -loglevel -0 -y -i samples/gb0.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/gb0.wav
1123
+ @ffmpeg -loglevel -0 -y -i samples/gb1.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/gb1.wav
1124
+ @ffmpeg -loglevel -0 -y -i samples/hp0.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/hp0.wav
1125
+ @rm samples/*.ogg
1126
+ @ffmpeg -loglevel -0 -y -i samples/mm1.wav -ar 16000 -ac 1 -c:a pcm_s16le samples/mm0.wav
1127
+ @rm samples/mm1.wav
1128
+ @ffmpeg -loglevel -0 -y -i samples/a13.mp3 -ar 16000 -ac 1 -c:a pcm_s16le -ss 00:00:00 -to 00:00:30 samples/a13.wav
1129
+ @rm samples/a13.mp3
1130
+ @ffmpeg -loglevel -0 -y -i samples/diffusion2023-07-03.flac -ar 16000 -ac 1 -c:a pcm_s16le samples/diffusion2023-07-03.wav
1131
+ @rm samples/diffusion2023-07-03.flac
1132
+
1133
+ #
1134
+ # Models
1135
+ #
1136
+
1137
+ # if not already downloaded, the following targets download the specified model and
1138
+ # runs it on all samples in the folder "./samples":
1139
+
1140
+ .PHONY: tiny.en
1141
+ .PHONY: tiny
1142
+ .PHONY: base.en
1143
+ .PHONY: base
1144
+ .PHONY: small.en
1145
+ .PHONY: small
1146
+ .PHONY: medium.en
1147
+ .PHONY: medium
1148
+ .PHONY: large-v1
1149
+ .PHONY: large-v2
1150
+ .PHONY: large-v3
1151
+
1152
+ tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3: main
1153
+ bash ./models/download-ggml-model.sh $@
1154
+ @echo ""
1155
+ @echo "==============================================="
1156
+ @echo "Running $@ on all samples in ./samples ..."
1157
+ @echo "==============================================="
1158
+ @echo ""
1159
+ @for f in samples/*.wav; do \
1160
+ echo "----------------------------------------------" ; \
1161
+ echo "[+] Running $@ on $$f ... (run 'ffplay $$f' to listen)" ; \
1162
+ echo "----------------------------------------------" ; \
1163
+ echo "" ; \
1164
+ ./main -m models/[email protected] -f $$f ; \
1165
+ echo "" ; \
1166
+ done