diff --git a/PolyML/PolyML.vcxproj b/PolyML/PolyML.vcxproj
index 4f1940d1..5d6cb23a 100644
--- a/PolyML/PolyML.vcxproj
+++ b/PolyML/PolyML.vcxproj
@@ -1,1163 +1,1163 @@
Debug32in64ARM64Debug32in64Win32Debug32in64x64DebugInt32in64ARM64DebugInt32in64Win32DebugInt32in64x64DebugInterpretedARM64DebugARM64DebugWin32DebugInterpretedWin32DebugInterpretedx64Release32in64ARM64ReleaseInt32in64ARM64ReleaseInt32in64Win32ReleaseInt32in64x64ReleaseInterpretedARM64ReleaseInterpretedWin32ReleaseInterpretedx64Release32in64Win32Release32in64x64ReleaseARM64ReleaseWin32Debugx64Releasex64{0326c47a-00af-42cb-b87d-0369a241b570}{0ba5d5b5-f85b-4c49-8a27-67186fa68922}{1ba3e7a2-d64f-4ce3-9fe5-7846b855c19f}falsefalsefalsefalsefalsefalsefalsefalsetruetruefalsefalsefalsefalsefalsefalsetruetruetruetruefalsefalsetruetruecd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.objtruetruetruetruetruetruetruetruefalsefalsetruetruetruetruetruetruefalsefalsefalsefalsetruetruefalsefalsecd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
cd ..
-$(OutDir)PolyImport.exe -H 32 "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
+$(OutDir)PolyImport.exe "%(FullPath)" -o PolyML\$(IntDir)polyexport.obj < bootstrap\Stage1.sml
$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj$(IntDir)polyexport.obj{DF3B373E-67DF-4AB4-8B1E-F54C5810E2CF}Win32ProjPolyML10.0Applicationtruev142UnicodeApplicationtruev142UnicodeApplicationtruev142UnicodeApplicationtruev142UnicodeApplicationfalsev142trueUnicodeApplicationfalsev142trueUnicodeApplicationfalsev142trueUnicodeApplicationfalsev142trueUnicodeApplicationtruev142UnicodeApplicationtruev142UnicodeApplicationtruev142UnicodeApplicationtruev142UnicodeApplicationtruev142UnicodeApplicationtruev142UnicodeApplicationtruev142UnicodeApplicationtruev142UnicodeApplicationfalsev142trueUnicodeApplicationfalsev142trueUnicodeApplicationfalsev142trueUnicodeApplicationfalsev142trueUnicodeApplicationfalsev142trueUnicodeApplicationfalsev142trueUnicodeApplicationfalsev142trueUnicodeApplicationfalsev142trueUnicodetruetruetruetruetruetruetruetruetruetruetruetruefalsefalsefalsefalsefalsefalsefalsefalsefalsefalsefalsefalseNotUsingLevel3DisabledWIN32;_DEBUG;_WINDOWS;%(PreprocessorDefinitions)Windowstruelibcmtd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)true6.0falseNotUsingLevel3DisabledWIN32;_DEBUG;_WINDOWS;%(PreprocessorDefinitions)Windowstruelibcmtd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)true6.0falseNotUsingLevel3DisabledWIN32;_DEBUG;_WINDOWS;%(PreprocessorDefinitions)Windowstruelibcmtd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)true6.0falseNotUsingLevel3DisabledWIN32;_DEBUG;_WINDOWS;%(PreprocessorDefinitions)Windowstruelibcmtd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)true6.0falseNotUsingLevel3Disabled_DEBUG;_WINDOWS;%(PreprocessorDefinitions)Windowstruelibcmtd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)true6.0falseNotUsingLevel3Disabled_DEBUG;_WINDOWS;%(PreprocessorDefinitions)WindowsDebugFulllibcmtd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)truefalseNotUsingLevel3Disabled_DEBUG;_WINDOWS;%(PreprocessorDefinitions)Windowstruelibcmtd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)true6.0falseNotUsingLevel3Disabled_DEBUG;_WINDOWS;%(PreprocessorDefinitions)Windowstruelibcmtd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)truefalseNotUsingLevel3Disabled_DEBUG;_WINDOWS;%(PreprocessorDefinitions)Windowstruelibcmtd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)true6.0falseNotUsingLevel3Disabled_DEBUG;_WINDOWS;%(PreprocessorDefinitions)Windowstruelibcmtd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)truefalseNotUsingLevel3Disabled_DEBUG;_WINDOWS;%(PreprocessorDefinitions)Windowstruelibcmtd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)true6.0falseNotUsingLevel3Disabled_DEBUG;_WINDOWS;%(PreprocessorDefinitions)Windowstruelibcmtd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)truefalseLevel3NotUsingMaxSpeedtruetrueWIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions)Windowstruetruetruelibcmt.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)true6.0falseLevel3NotUsingMaxSpeedtruetrueWIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions)Windowstruetruetruelibcmt.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)true6.0falseLevel3NotUsingMaxSpeedtruetrueWIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions)Windowstruetruetruelibcmt.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)true6.0falseLevel3NotUsingMaxSpeedtruetrueWIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions)Windowstruetruetruelibcmt.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)true6.0falseLevel3NotUsingMaxSpeedtruetrueNDEBUG;_WINDOWS;%(PreprocessorDefinitions)Windowstruetruetruelibcmt.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)true6.0falseLevel3NotUsingMaxSpeedtruetrueNDEBUG;_WINDOWS;%(PreprocessorDefinitions)Windowstruetruetruelibcmt.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)truefalseLevel3NotUsingMaxSpeedtruetrueNDEBUG;_WINDOWS;%(PreprocessorDefinitions)Windowstruetruetruelibcmt.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)true6.0falseLevel3NotUsingMaxSpeedtruetrueNDEBUG;_WINDOWS;%(PreprocessorDefinitions)Windowstruetruetruelibcmt.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)truefalseLevel3NotUsingMaxSpeedtruetrueNDEBUG;_WINDOWS;%(PreprocessorDefinitions)Windowstruetruetruelibcmt.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)true6.0falseLevel3NotUsingMaxSpeedtruetrueNDEBUG;_WINDOWS;%(PreprocessorDefinitions)Windowstruetruetruelibcmt.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)truefalseLevel3NotUsingMaxSpeedtruetrueNDEBUG;_WINDOWS;%(PreprocessorDefinitions)Windowstruetruetruelibcmt.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)true6.0falseLevel3NotUsingMaxSpeedtruetrueNDEBUG;_WINDOWS;%(PreprocessorDefinitions)Windowstruetruetruelibcmt.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)truefalse
\ No newline at end of file
diff --git a/libpolyml/check_objects.cpp b/libpolyml/check_objects.cpp
index 391f8dbe..e142bc2d 100644
--- a/libpolyml/check_objects.cpp
+++ b/libpolyml/check_objects.cpp
@@ -1,169 +1,168 @@
/*
Title: Validate addresses in objects.
- Copyright (c) 2006, 2012, 2017
- David C.J. Matthews
+ Copyright (c) 2006, 2012, 2017, 2021 David C.J. Matthews
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#elif defined(_WIN32)
#include "winconfig.h"
#else
#error "No configuration file"
#endif
#ifdef HAVE_ASSERT_H
#include
#define ASSERT(x) assert(x)
#else
#define ASSERT(x)
#endif
#include "globals.h"
#include "diagnostics.h"
#include "machine_dep.h"
#include "scanaddrs.h"
#include "memmgr.h"
#define INRANGE(val,start,end)\
(start <= val && val < end)
static void CheckAddress(PolyWord *pt)
{
MemSpace *space = gMem.SpaceForAddress(pt-1);
if (space == 0)
{
Log("Check: Bad pointer %p (no space found)\n", pt);
ASSERT(space != 0);
}
if (space->spaceType == ST_STACK) // This may not have valid length words.
return;
PolyObject *obj = (PolyObject*)pt;
ASSERT(obj->ContainsNormalLengthWord());
POLYUNSIGNED length = obj->Length();
if (pt+length > space->top)
{
Log("Check: Bad pointer %p (space %p) length %" POLYUFMT "\n", pt, space, length);
ASSERT(pt+length <= space->top);
}
if (space->spaceType == ST_LOCAL)
{
LocalMemSpace *lSpace = (LocalMemSpace*)space;
if (!((pt > lSpace->bottom && pt+length <= lSpace->lowerAllocPtr) ||
(pt > lSpace->upperAllocPtr && pt+length <= space->top)))
{
Log("Check: Bad pointer %p (space %p) length %" POLYUFMT " outside allocated area\n", pt, space, length);
ASSERT((pt > lSpace->bottom && pt+length <= lSpace->lowerAllocPtr) ||
(pt > lSpace->upperAllocPtr && pt+length <= space->top));
}
}
}
void DoCheck (const PolyWord pt)
{
if (pt == PolyWord::FromUnsigned(0)) return;
if (pt.IsTagged()) return;
CheckAddress(pt.AsStackAddr());
}
class ScanCheckAddress: public ScanAddress
{
public:
virtual PolyObject *ScanObjectAddress(PolyObject *pt) { CheckAddress((PolyWord*)pt); return pt; }
};
void DoCheckObject (const PolyObject *base, POLYUNSIGNED L)
{
PolyWord *pt = (PolyWord*)base;
CheckAddress(pt);
MemSpace *space = gMem.SpaceForAddress(pt-1);
if (space == 0)
Crash ("Bad pointer 0x%08" PRIxPTR " found", (uintptr_t)pt);
ASSERT (OBJ_IS_LENGTH(L));
POLYUNSIGNED n = OBJ_OBJECT_LENGTH(L);
if (n == 0) return;
ASSERT (n > 0);
ASSERT(pt-1 >= space->bottom && pt+n <= space->top);
byte flags = GetTypeBits(L); /* discards GC flag and mutable bit */
if (flags == F_BYTE_OBJ) /* possibly signed byte object */
return; /* Nothing more to do */
if (flags == F_CODE_OBJ) /* code object */
{
ScanCheckAddress checkAddr;
/* We flush the instruction cache here in case we change any of the
instructions when we update addresses. */
machineDependent->FlushInstructionCache(pt, (n + 1) * sizeof(PolyWord));
- machineDependent->ScanConstantsWithinCode((PolyObject *)base, (PolyObject *)base, n, &checkAddr);
+ machineDependent->ScanConstantsWithinCode((PolyObject *)base, n, &checkAddr);
/* Skip to the constants. */
- base->GetConstSegmentForCode(n, pt, n);
+ machineDependent->GetConstSegmentForCode((PolyObject*)base, n, pt, n);
}
else if (flags == F_CLOSURE_OBJ)
{
n -= sizeof(PolyObject*) / sizeof(PolyWord);
pt += sizeof(PolyObject*) / sizeof(PolyWord);
}
else ASSERT (flags == 0); /* ordinary word object */
while (n--) DoCheck (*pt++);
}
void DoCheckPointer (const PolyWord pt)
{
if (pt == PolyWord::FromUnsigned(0)) return;
if (OBJ_IS_AN_INTEGER(pt)) return;
DoCheck (pt);
if (pt.IsDataPtr())
{
PolyObject *obj = pt.AsObjPtr();
DoCheckObject (obj, obj->LengthWord());
}
}
// Check all the objects in the memory. Used to check the garbage collector
//
void DoCheckMemory()
{
ScanCheckAddress memCheck;
// Scan the local areas.
for (std::vector::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
{
LocalMemSpace *space = *i;
memCheck.ScanAddressesInRegion(space->bottom, space->lowerAllocPtr);
memCheck.ScanAddressesInRegion(space->upperAllocPtr, space->top);
}
// Scan the permanent mutable areas.
for (std::vector::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++)
{
PermanentMemSpace *space = *i;
if (space->isMutable && ! space->byteOnly)
memCheck.ScanAddressesInRegion(space->bottom, space->top);
}
}
diff --git a/libpolyml/elfexport.cpp b/libpolyml/elfexport.cpp
index e16c7971..d19251a4 100644
--- a/libpolyml/elfexport.cpp
+++ b/libpolyml/elfexport.cpp
@@ -1,819 +1,834 @@
/*
Title: Write out a database as an ELF object file
Author: David Matthews.
- Copyright (c) 2006-7, 2011, 2016-18, 2020 David C. J. Matthews
+ Copyright (c) 2006-7, 2011, 2016-18, 2020-21 David C. J. Matthews
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR H PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#ifdef HAVE_STDLIB_H
#include
#endif
#ifdef HAVE_STDIO_H
#include
#endif
#ifdef HAVE_STDDEF_H
#include
#endif
#ifdef HAVE_UNISTD_H
#include
#endif
#ifdef HAVE_ERRNO_H
#include
#endif
#ifdef HAVE_TIME_H
#include
#endif
#ifdef HAVE_ASSERT_H
#include
#define ASSERT(x) assert(x)
#else
#define ASSERT(x)
#endif
#ifdef HAVE_ELF_H
#include
#elif defined(HAVE_ELF_ABI_H)
#include
#endif
#ifdef HAVE_MACHINE_RELOC_H
#include
#ifndef EM_X86_64
#define EM_X86_64 EM_AMD64
#endif
#if defined(HOSTARCHITECTURE_X86_64)
#ifndef R_386_PC32
#define R_386_PC32 R_X86_64_PC32
#endif
#ifndef R_386_32
#define R_386_32 R_X86_64_32
#endif
#ifndef R_X86_64_64
#define R_X86_64_64 R_X86_64_64
#endif
#endif /* HOSTARCHITECTURE_X86_64 */
#endif
// Solaris seems to put processor-specific constants in separate files
#ifdef HAVE_SYS_ELF_SPARC_H
#include
#endif
#ifdef HAVE_SYS_ELF_386_H
#include
#endif
#ifdef HAVE_SYS_ELF_AMD64_H
#include
#endif
// Android has the ARM relocation symbol here
#ifdef HAVE_ASM_ELF_H
#include
#endif
#ifdef HAVE_STRING_H
#include
#endif
#ifdef HAVE_SYS_UTSNAME_H
#include
#endif
#include "globals.h"
#include "diagnostics.h"
#include "sys.h"
#include "machine_dep.h"
#include "gc.h"
#include "mpoly.h"
#include "scanaddrs.h"
#include "elfexport.h"
#include "run_time.h"
#include "version.h"
#include "polystring.h"
#include "timing.h"
+#include "memmgr.h"
+
#define sym_last_local_sym sym_data_section
#if defined(HOSTARCHITECTURE_X86)
# define HOST_E_MACHINE EM_386
# define HOST_DIRECT_DATA_RELOC R_386_32
# define HOST_DIRECT_FPTR_RELOC R_386_32
# define USE_RELA 0
#elif defined(HOSTARCHITECTURE_PPC)
# define HOST_E_MACHINE EM_PPC
# define HOST_DIRECT_DATA_RELOC R_PPC_ADDR32
# define HOST_DIRECT_FPTR_RELOC R_PPC_ADDR32
# define USE_RELA 1
#elif defined(HOSTARCHITECTURE_PPC64)
# define HOST_E_MACHINE EM_PPC64
# define HOST_DIRECT_DATA_RELOC R_PPC64_ADDR64
# define HOST_DIRECT_FPTR_RELOC R_PPC64_ADDR64
# define USE_RELA 1
#elif defined(HOSTARCHITECTURE_S390)
# define HOST_E_MACHINE EM_S390
# define HOST_DIRECT_DATA_RELOC R_390_32
# define HOST_DIRECT_FPTR_RELOC R_390_32
# define USE_RELA 1
#elif defined(HOSTARCHITECTURE_S390X)
# define HOST_E_MACHINE EM_S390
# define HOST_DIRECT_DATA_RELOC R_390_64
# define HOST_DIRECT_FPTR_RELOC R_390_64
# define USE_RELA 1
#elif defined(HOSTARCHITECTURE_SH)
# define HOST_E_MACHINE EM_SH
# define HOST_DIRECT_DATA_RELOC R_SH_DIR32
# define HOST_DIRECT_FPTR_RELOC R_SH_DIR32
# define USE_RELA 1
#elif defined(HOSTARCHITECTURE_SPARC)
# define HOST_E_MACHINE EM_SPARC
# define HOST_DIRECT_DATA_RELOC R_SPARC_32
# define HOST_DIRECT_FPTR_RELOC R_SPARC_32
# define USE_RELA 1
/* Sparc/Solaris, at least 2.8, requires ELF32_Rela relocations. For some reason,
though, it adds the value in the location being relocated (as with ELF32_Rel
relocations) as well as the addend. To be safe, whenever we use an ELF32_Rela
relocation we always zero the location to be relocated. */
#elif defined(HOSTARCHITECTURE_SPARC64)
# define HOST_E_MACHINE EM_SPARCV9
# define HOST_DIRECT_DATA_RELOC R_SPARC_64
# define HOST_DIRECT_FPTR_RELOC R_SPARC_64
/* Use the most relaxed memory model. At link time, the most restrictive one is
chosen, so it does no harm to be as permissive as possible here. */
# define HOST_E_FLAGS EF_SPARCV9_RMO
# define USE_RELA 1
#elif defined(HOSTARCHITECTURE_X86_64)
/* It seems Solaris/X86-64 only supports ELF64_Rela relocations. It appears that
Linux will support either so we now use Rela on X86-64. */
# define HOST_E_MACHINE EM_X86_64
# define HOST_DIRECT_DATA_RELOC R_X86_64_64
# define HOST_DIRECT_FPTR_RELOC R_X86_64_64
# define USE_RELA 1
#elif defined(HOSTARCHITECTURE_X32)
# define HOST_E_MACHINE EM_X86_64
# define HOST_DIRECT_DATA_RELOC R_X86_64_32
# define HOST_DIRECT_FPTR_RELOC R_X86_64_32
# define USE_RELA 1
#elif defined(HOSTARCHITECTURE_ARM)
# ifndef EF_ARM_EABI_VER4
# define EF_ARM_EABI_VER4 0x04000000
# endif
// When linking ARM binaries the linker checks the ABI version. We
// need to set the version to the same as the libraries.
// GCC currently uses version 4.
# define HOST_E_MACHINE EM_ARM
# define HOST_DIRECT_DATA_RELOC R_ARM_ABS32
# define HOST_DIRECT_FPTR_RELOC R_ARM_ABS32
# define USE_RELA 0
# define HOST_E_FLAGS EF_ARM_EABI_VER4
#elif defined(HOSTARCHITECTURE_HPPA)
# if defined(__hpux)
# define HOST_OSABI ELFOSABI_HPUX
# elif defined(__NetBSD__)
# define HOST_OSABI ELFOSABI_NETBSD
# elif defined(__linux__)
# define HOST_OSABI ELFOSABI_GNU
# endif
# define HOST_E_MACHINE EM_PARISC
# define HOST_DIRECT_DATA_RELOC R_PARISC_DIR32
# define HOST_DIRECT_FPTR_RELOC R_PARISC_PLABEL32
# define HOST_E_FLAGS EFA_PARISC_1_0
# define USE_RELA 1
#elif defined(HOSTARCHITECTURE_IA64)
# define HOST_E_MACHINE EM_IA_64
# define HOST_DIRECT_DATA_RELOC R_IA64_DIR64LSB
# define HOST_DIRECT_FPTR_RELOC R_IA64_FPTR64LSB
# define HOST_E_FLAGS EF_IA_64_ABI64
# define USE_RELA 1
#elif defined(HOSTARCHITECTURE_AARCH64)
# define HOST_E_MACHINE EM_AARCH64
# define HOST_DIRECT_DATA_RELOC R_AARCH64_ABS64
# define HOST_DIRECT_FPTR_RELOC R_AARCH64_ABS64
# define USE_RELA 1
#elif defined(HOSTARCHITECTURE_M68K)
# define HOST_E_MACHINE EM_68K
# define HOST_DIRECT_DATA_RELOC R_68K_32
# define HOST_DIRECT_FPTR_RELOC R_68K_32
# define USE_RELA 1
#elif defined(HOSTARCHITECTURE_MIPS)
# define HOST_E_MACHINE EM_MIPS
# define HOST_DIRECT_DATA_RELOC R_MIPS_32
# define HOST_DIRECT_FPTR_RELOC R_MIPS_32
# ifdef __PIC__
# define HOST_E_FLAGS EF_MIPS_CPIC
# endif
# define USE_RELA 1
#elif defined(HOSTARCHITECTURE_MIPS64)
# define HOST_E_MACHINE EM_MIPS
# define HOST_DIRECT_DATA_RELOC R_MIPS_64
# define HOST_DIRECT_FPTR_RELOC R_MIPS_64
# ifdef __PIC__
# define HOST_E_FLAGS (EF_MIPS_ARCH_64 | EF_MIPS_CPIC)
# else
# define HOST_E_FLAGS EF_MIPS_ARCH_64
# endif
# define USE_RELA 1
#elif defined(HOSTARCHITECTURE_ALPHA)
# define HOST_E_MACHINE EM_ALPHA
# define HOST_DIRECT_DATA_RELOC R_ALPHA_REFQUAD
# define HOST_DIRECT_FPTR_RELOC R_ALPHA_REFQUAD
# define USE_RELA 1
#elif defined(HOSTARCHITECTURE_RISCV32) || defined(HOSTARCHITECTURE_RISCV64)
# define HOST_E_MACHINE EM_RISCV
# if defined(HOSTARCHITECTURE_RISCV32)
# define HOST_DIRECT_DATA_RELOC R_RISCV_32
# define HOST_DIRECT_FPTR_RELOC R_RISCV_32
# else
# define HOST_DIRECT_DATA_RELOC R_RISCV_64
# define HOST_DIRECT_FPTR_RELOC R_RISCV_64
# endif
# if defined(__riscv_float_abi_soft)
# define HOST_E_FLAGS_FLOAT_ABI EF_RISCV_FLOAT_ABI_SOFT
# elif defined(__riscv_float_abi_single)
# define HOST_E_FLAGS_FLOAT_ABI EF_RISCV_FLOAT_ABI_SINGLE
# elif defined(__riscv_float_abi_double)
# define HOST_E_FLAGS_FLOAT_ABI EF_RISCV_FLOAT_ABI_DOUBLE
# elif defined(__riscv_float_abi_quad)
# define HOST_E_FLAGS_FLOAT_ABI EF_RISCV_FLOAT_ABI_QUAD
# else
# error "Unknown RISC-V float ABI"
# endif
# ifdef __riscv_32e
# define HOST_E_FLAGS_RVE __riscv_32e
# else
# define HOST_E_FLAGS_RVE 0
# endif
# define HOST_E_FLAGS (HOST_E_FLAGS_FLOAT_ABI | HOST_E_FLAGS_RVE)
# define USE_RELA 1
#else
# error "No support for exporting on this architecture"
#endif
// The first two symbols are special:
// Zero is always special in ELF
// 1 is used for the data section
#define EXTRA_SYMBOLS 2
static unsigned AreaToSym(unsigned area) { return area+EXTRA_SYMBOLS; }
// Section table entries
enum {
sect_initial = 0,
sect_sectionnametable,
sect_stringtable,
// Data and relocation entries come in here.
sect_data
// Finally the symbol table
};
// Add an external reference to the RTS
void ELFExport::addExternalReference(void *relocAddr, const char *name, bool isFuncPtr)
{
externTable.makeEntry(name);
// The symbol is added after the memory table entries and poly_exports
writeRelocation(0, relocAddr, symbolNum++, isFuncPtr);
}
// Generate the address relative to the start of the segment.
void ELFExport::setRelocationAddress(void *p, ElfXX_Addr *reloc)
{
unsigned area = findArea(p);
POLYUNSIGNED offset = (char*)p - (char*)memTable[area].mtOriginalAddr;
*reloc = offset;
}
/* Get the index corresponding to an address. */
PolyWord ELFExport::createRelocation(PolyWord p, void *relocAddr)
{
void *addr = p.AsAddress();
unsigned addrArea = findArea(addr);
POLYUNSIGNED offset = (char*)addr - (char*)memTable[addrArea].mtOriginalAddr;
return writeRelocation(offset, relocAddr, AreaToSym(addrArea), false);
}
PolyWord ELFExport::writeRelocation(POLYUNSIGNED offset, void *relocAddr, unsigned symbolNum, bool isFuncPtr)
{
#if USE_RELA
ElfXX_Rela reloc;
reloc.r_addend = offset;
offset = 0;
#else
ElfXX_Rel reloc;
#endif
// Set the offset within the section we're scanning.
setRelocationAddress(relocAddr, &reloc.r_offset);
#ifdef HOSTARCHITECTURE_MIPS64
reloc.r_sym = symbolNum;
reloc.r_ssym = 0;
reloc.r_type = isFuncPtr ? HOST_DIRECT_FPTR_RELOC : HOST_DIRECT_DATA_RELOC;
reloc.r_type2 = 0;
reloc.r_type3 = 0;
#else
reloc.r_info = ELFXX_R_INFO(symbolNum, isFuncPtr ? HOST_DIRECT_FPTR_RELOC : HOST_DIRECT_DATA_RELOC);
#endif
fwrite(&reloc, sizeof(reloc), 1, exportFile);
relocationCount++;
return PolyWord::FromUnsigned(offset);
}
/* This is called for each constant within the code.
Print a relocation entry for the word and return a value that means
that the offset is saved in original word. */
-void ELFExport::ScanConstant(PolyObject *base, byte *addr, ScanRelocationKind code)
+void ELFExport::ScanConstant(PolyObject *base, byte *addr, ScanRelocationKind code, intptr_t displacement)
{
#ifndef POLYML32IN64
- PolyObject *p = GetConstantValue(addr, code);
+ PolyObject *p = GetConstantValue(addr, code, displacement);
if (p == 0)
return;
void *a = p;
unsigned aArea = findArea(a);
// We don't need a relocation if this is relative to the current segment
// since the relative address will already be right.
if (code == PROCESS_RELOC_I386RELATIVE && aArea == findArea(addr))
return;
// Set the value at the address to the offset relative to the symbol.
POLYUNSIGNED offset = (char*)a - (char*)memTable[aArea].mtOriginalAddr;
switch (code)
{
case PROCESS_RELOC_DIRECT: // 32 or 64 bit address of target
{
PolyWord r = createRelocation(p, addr);
POLYUNSIGNED w = r.AsUnsigned();
for (unsigned i = 0; i < sizeof(PolyWord); i++)
{
addr[i] = (byte)(w & 0xff);
w >>= 8;
}
}
break;
#if(defined(HOSTARCHITECTURE_X86) || defined(HOSTARCHITECTURE_X86_64) || \
defined(HOSTARCHITECTURE_X32))
#ifdef HOSTARCHITECTURE_X86
#define R_PC_RELATIVE R_386_PC32
#else
#define R_PC_RELATIVE R_X86_64_PC32
#endif
case PROCESS_RELOC_I386RELATIVE: // 32 bit relative address
{
+ // We seem to need to subtract 4 bytes to get the correct offset in ELF
+ offset -= 4;
#if USE_RELA
ElfXX_Rela reloc;
reloc.r_addend = offset;
#else
ElfXX_Rel reloc;
#endif
setRelocationAddress(addr, &reloc.r_offset);
- // We seem to need to subtract 4 bytes to get the correct offset in ELF
- offset -= 4;
reloc.r_info = ELFXX_R_INFO(AreaToSym(aArea), R_PC_RELATIVE);
+ byte *writAble = gMem.SpaceForAddress(addr)->writeAble(addr);
#if USE_RELA
// Clear the field. Even though it's not supposed to be used with Rela the
// Linux linker at least seems to add the value in here sometimes.
- memset(addr, 0, 4);
+ memset(writAble, 0, 4);
#else
for (unsigned i = 0; i < 4; i++)
{
- addr[i] = (byte)(offset & 0xff);
+ writAble[i] = (byte)(offset & 0xff);
offset >>= 8;
}
#endif
fwrite(&reloc, sizeof(reloc), 1, exportFile);
relocationCount++;
}
break;
#endif
default:
ASSERT(0); // Wrong type of relocation for this architecture.
}
#endif
}
unsigned long ELFExport::makeStringTableEntry(const char *str, ExportStringTable *stab)
{
if (str == NULL || str[0] == 0)
return 0; // First entry is the null string.
else
return stab->makeEntry(str);
}
void ELFExport::writeSymbol(const char *symbolName, long value, long size, int binding, int sttype, int section)
{
ElfXX_Sym symbol;
memset(&symbol, 0, sizeof(symbol)); // Zero unused fields
symbol.st_name = makeStringTableEntry(symbolName, &symStrings);
symbol.st_value = value;
symbol.st_size = size;
symbol.st_info = ELFXX_ST_INFO(binding, sttype);
symbol.st_other = 0;
symbol.st_shndx = section;
fwrite(&symbol, sizeof(symbol), 1, exportFile);
}
// Set the file alignment.
void ELFExport::alignFile(int align)
{
char pad[32] = {0}; // Maximum alignment
int offset = ftell(exportFile);
if ((offset % align) == 0) return;
fwrite(&pad, align - (offset % align), 1, exportFile);
}
void ELFExport::createStructsRelocation(unsigned sym, size_t offset, size_t addend)
{
#if USE_RELA
ElfXX_Rela reloc;
reloc.r_addend = addend;
#else
ElfXX_Rel reloc;
#endif
reloc.r_offset = offset;
#ifdef HOSTARCHITECTURE_MIPS64
reloc.r_sym = sym;
reloc.r_ssym = 0;
reloc.r_type = HOST_DIRECT_DATA_RELOC;
reloc.r_type2 = 0;
reloc.r_type3 = 0;
#else
reloc.r_info = ELFXX_R_INFO(sym, HOST_DIRECT_DATA_RELOC);
#endif
fwrite(&reloc, sizeof(reloc), 1, exportFile);
relocationCount++;
}
void ELFExport::exportStore(void)
{
PolyWord *p;
ElfXX_Ehdr fhdr;
ElfXX_Shdr *sections = 0;
#ifdef __linux__
unsigned extraSections = 1; // Extra section for .note.GNU-stack
#else
unsigned extraSections = 0;
#endif
unsigned numSections = 0;
for (unsigned j = 0; j < memTableEntries; j++)
{
if ((memTable[j].mtFlags & (MTF_BYTES|MTF_WRITEABLE)) == MTF_BYTES)
numSections += 1;
else numSections += 2;
}
// The symbol table comes at the end.
unsigned sect_symtab = sect_data + numSections + 2;
numSections += 6 + extraSections;
// External symbols start after the memory table entries and "poly_exports".
symbolNum = EXTRA_SYMBOLS+memTableEntries+1;
// Both the string tables have an initial null entry.
symStrings.makeEntry("");
sectionStrings.makeEntry("");
// Write out initial values for the headers. These are overwritten at the end.
// File header
memset(&fhdr, 0, sizeof(fhdr));
fhdr.e_ident[EI_MAG0] = 0x7f;
fhdr.e_ident[EI_MAG1] = 'E';
fhdr.e_ident[EI_MAG2] = 'L';
fhdr.e_ident[EI_MAG3] = 'F';
fhdr.e_ident[EI_CLASS] = ELFCLASSXX; // ELFCLASS32 or ELFCLASS64
fhdr.e_ident[EI_VERSION] = EV_CURRENT;
#ifdef HOST_OSABI
fhdr.e_ident[EI_OSABI] = HOST_OSABI;
#endif
{
union { unsigned long wrd; char chrs[sizeof(unsigned long)]; } endian;
endian.wrd = 1;
if (endian.chrs[0] == 0)
fhdr.e_ident[EI_DATA] = ELFDATA2MSB; // Big endian
else
fhdr.e_ident[EI_DATA] = ELFDATA2LSB; // Little endian
}
fhdr.e_type = ET_REL;
// The machine needs to match the machine we're compiling for
// even if this is actually portable code.
fhdr.e_machine = HOST_E_MACHINE;
#ifdef HOST_E_FLAGS
fhdr.e_flags = HOST_E_FLAGS;
#endif
fhdr.e_version = EV_CURRENT;
fhdr.e_shoff = sizeof(fhdr); // Offset to section header - immediately follows
fhdr.e_ehsize = sizeof(fhdr);
fhdr.e_shentsize = sizeof(ElfXX_Shdr);
fhdr.e_shnum = numSections;
fhdr.e_shstrndx = sect_sectionnametable; // Section name table section index;
fwrite(&fhdr, sizeof(fhdr), 1, exportFile); // Write it for the moment.
sections = new ElfXX_Shdr[numSections];
memset(sections, 0, sizeof(ElfXX_Shdr) * numSections); // Necessary?
// Set up the section header but don't write it yet.
// Section 0 - all zeros
sections[sect_initial].sh_type = SHT_NULL;
sections[sect_initial].sh_link = SHN_UNDEF;
// Section name table.
sections[sect_sectionnametable].sh_name = makeStringTableEntry(".shstrtab", §ionStrings);
sections[sect_sectionnametable].sh_type = SHT_STRTAB;
sections[sect_sectionnametable].sh_addralign = sizeof(char);
// sections[sect_sectionnametable].sh_offset is set later
// sections[sect_sectionnametable].sh_size is set later
// Symbol name table.
sections[sect_stringtable].sh_name = makeStringTableEntry(".strtab", §ionStrings);
sections[sect_stringtable].sh_type = SHT_STRTAB;
sections[sect_stringtable].sh_addralign = sizeof(char);
// sections[sect_stringtable].sh_offset is set later
// sections[sect_stringtable].sh_size is set later
unsigned long dataName = makeStringTableEntry(".data", §ionStrings);
unsigned long dataRelName = makeStringTableEntry(USE_RELA ? ".rela.data" : ".rel.data", §ionStrings);
#ifndef CODEISNOTEXECUTABLE
unsigned long textName = makeStringTableEntry(".text", §ionStrings);
unsigned long textRelName = makeStringTableEntry(USE_RELA ? ".rela.text" : ".rel.text", §ionStrings);
#endif
// The Linux linker does not like relocations in the .rodata section and marks the executable
// as containing text relocations. Putting the data in a .data.rel.ro section seems to work.
unsigned long relDataName = makeStringTableEntry(".data.rel.ro", §ionStrings);
unsigned long relDataRelName = makeStringTableEntry(USE_RELA ? ".rela.data.rel.ro" : ".rel.data.rel.ro", §ionStrings);
// Byte and other leaf data that do not require relocation can go in the .rodata section
unsigned long nRelDataName = makeStringTableEntry(".rodata", §ionStrings);
// Main data sections. Each one has a relocation section.
unsigned s = sect_data;
for (unsigned i=0; i < memTableEntries; i++)
{
sections[s].sh_addralign = 8; // 8-byte alignment
sections[s].sh_type = SHT_PROGBITS;
if (memTable[i].mtFlags & MTF_WRITEABLE)
{
// Mutable areas
ASSERT(!(memTable[i].mtFlags & MTF_EXECUTABLE)); // Executable areas can't be writable.
sections[s].sh_name = dataName;
sections[s].sh_flags = SHF_WRITE | SHF_ALLOC;
s++;
// Mutable byte areas can contain external references so need relocation
sections[s].sh_name = dataRelName; // Name of relocation section
}
#ifndef CODEISNOTEXECUTABLE
// Not if we're building the interpreted version.
else if (memTable[i].mtFlags & MTF_EXECUTABLE)
{
// Code areas are marked as executable.
sections[s].sh_name = textName;
sections[s].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
s++;
sections[s].sh_name = textRelName; // Name of relocation section
}
#endif
else if (memTable[i].mtFlags & MTF_BYTES)
{
// Data that does not require relocation.
// Non-code immutable areas
sections[s].sh_name = nRelDataName;
sections[s].sh_flags = SHF_ALLOC;
s++;
continue; // Skip the relocation section for this
}
else
{
// Non-code immutable areas
sections[s].sh_name = relDataName;
// The .data.rel.ro has to be writable in order to be relocated.
// It is set to read-only after relocation.
sections[s].sh_flags = SHF_WRITE | SHF_ALLOC;
s++;
sections[s].sh_name = relDataRelName; // Name of relocation section
}
// sections[s].sh_size is set later
// sections[s].sh_offset is set later.
// sections[s].sh_size is set later.
// Relocation section
sections[s].sh_type = USE_RELA ? SHT_RELA : SHT_REL; // Contains relocation with/out explicit addends (ElfXX_Rel)
sections[s].sh_link = sect_symtab; // Index to symbol table
sections[s].sh_info = s-1; // Applies to the data section
sections[s].sh_addralign = sizeof(long); // Align to a word
sections[s].sh_entsize = USE_RELA ? sizeof(ElfXX_Rela) : sizeof(ElfXX_Rel);
s++;
// sections[s+1].sh_offset is set later.
// sections[s+1].sh_size is set later.
}
// Table data - Poly tables that describe the memory layout.
unsigned sect_table_data = s;
sections[sect_table_data].sh_name = dataName;
sections[sect_table_data].sh_type = SHT_PROGBITS;
sections[sect_table_data].sh_flags = SHF_WRITE | SHF_ALLOC;
sections[sect_table_data].sh_addralign = 8; // 8-byte alignment
// Table relocation
sections[sect_table_data+1].sh_name = dataRelName;
sections[sect_table_data+1].sh_type = USE_RELA ? SHT_RELA : SHT_REL; // Contains relocation with/out explicit addends (ElfXX_Rel)
sections[sect_table_data+1].sh_link = sect_symtab; // Index to symbol table
sections[sect_table_data+1].sh_info = sect_table_data; // Applies to table section
sections[sect_table_data+1].sh_addralign = sizeof(long); // Align to a word
sections[sect_table_data+1].sh_entsize = USE_RELA ? sizeof(ElfXX_Rela) : sizeof(ElfXX_Rel);
// Symbol table.
sections[sect_symtab].sh_name = makeStringTableEntry(".symtab", §ionStrings);
sections[sect_symtab].sh_type = SHT_SYMTAB;
sections[sect_symtab].sh_link = sect_stringtable; // String table to use
sections[sect_symtab].sh_addralign = sizeof(long); // Align to a word
sections[sect_symtab].sh_entsize = sizeof(ElfXX_Sym);
// sections[sect_symtab].sh_info is set later
// sections[sect_symtab].sh_size is set later
// sections[sect_symtab].sh_offset is set later
#ifdef __linux__
// Add a .note.GNU-stack section to indicate this does not require executable stack
sections[numSections-1].sh_name = makeStringTableEntry(".note.GNU-stack", §ionStrings);
sections[numSections - 1].sh_type = SHT_PROGBITS;
#endif
// Write the relocations.
unsigned relocSection = sect_data;
for (unsigned i = 0; i < memTableEntries; i++)
{
relocSection++;
if ((memTable[i].mtFlags & (MTF_BYTES|MTF_WRITEABLE)) == MTF_BYTES)
continue;
alignFile(sections[relocSection].sh_addralign);
sections[relocSection].sh_offset = ftell(exportFile);
relocationCount = 0;
// Create the relocation table and turn all addresses into offsets.
char *start = (char*)memTable[i].mtOriginalAddr;
char *end = start + memTable[i].mtLength;
for (p = (PolyWord*)start; p < (PolyWord*)end; )
{
p++;
PolyObject *obj = (PolyObject*)p;
POLYUNSIGNED length = obj->Length();
- // Update any constants before processing the object
- // We need that for relative jumps/calls in X86/64.
if (length != 0 && obj->IsCodeObject())
+ {
+ POLYUNSIGNED constCount;
+ PolyWord* cp;
+ // Get the constant area pointer first because ScanConstantsWithinCode
+ // may alter it.
+ machineDependent->GetConstSegmentForCode(obj, cp, constCount);
+ // Update any constants before processing the object
+ // We need that for relative jumps/calls in X86/64.
machineDependent->ScanConstantsWithinCode(obj, this);
- relocateObject(obj);
+ if (cp > (PolyWord*)obj && cp < ((PolyWord*)obj) + length)
+ {
+ // Process the constants if they're in the area but not if they've been moved.
+ for (POLYUNSIGNED i = 0; i < constCount; i++) relocateValue(&(cp[i]));
+ }
+ }
+ else relocateObject(obj);
p += length;
}
sections[relocSection].sh_size =
relocationCount * (USE_RELA ? sizeof(ElfXX_Rela) : sizeof(ElfXX_Rel));
relocSection++;
}
// Relocations for "exports" and "memTable";
alignFile(sections[sect_table_data+1].sh_addralign);
sections[sect_table_data+1].sh_offset = ftell(exportFile);
relocationCount = 0;
// TODO: This won't be needed if we put these in a separate section.
POLYUNSIGNED areaSpace = 0;
for (unsigned i = 0; i < memTableEntries; i++)
areaSpace += memTable[i].mtLength;
// Address of "memTable" within "exports". We can't use createRelocation because
// the position of the relocation is not in either the mutable or the immutable area.
size_t memTableOffset = sizeof(exportDescription); // It follows immediately after this.
createStructsRelocation(AreaToSym(memTableEntries), offsetof(exportDescription, memTable), memTableOffset);
// Address of "rootFunction" within "exports"
unsigned rootAddrArea = findArea(rootFunction);
size_t rootOffset = (char*)rootFunction - (char*)memTable[rootAddrArea].mtOriginalAddr;
createStructsRelocation(AreaToSym(rootAddrArea), offsetof(exportDescription, rootFunction), rootOffset);
// Addresses of the areas within memtable.
for (unsigned i = 0; i < memTableEntries; i++)
{
createStructsRelocation(AreaToSym(i),
sizeof(exportDescription) + i * sizeof(memoryTableEntry) + offsetof(memoryTableEntry, mtCurrentAddr),
0 /* No offset relative to base symbol*/);
}
sections[sect_table_data+1].sh_size =
relocationCount * (USE_RELA ? sizeof(ElfXX_Rela) : sizeof(ElfXX_Rel));
// Now the symbol table.
alignFile(sections[sect_symtab].sh_addralign);
sections[sect_symtab].sh_offset = ftell(exportFile);
writeSymbol("", 0, 0, 0, 0, 0); // Initial symbol
// Write the local symbols first.
writeSymbol("", 0, 0, STB_LOCAL, STT_SECTION, sect_data); // .data section
// Create symbols for the address areas. AreaToSym assumes these come first.
s = sect_data;
for (unsigned i = 0; i < memTableEntries; i++)
{
char buff[50];
sprintf(buff, "area%1u", i);
writeSymbol(buff, 0, 0, STB_LOCAL, STT_OBJECT, s);
if ((memTable[i].mtFlags & (MTF_BYTES|MTF_WRITEABLE)) == MTF_BYTES)
s += 1;
else s += 2;
}
// Global symbols - Exported symbol for table.
writeSymbol("poly_exports", 0,
sizeof(exportDescription)+sizeof(memoryTableEntry)*memTableEntries,
STB_GLOBAL, STT_OBJECT, sect_table_data);
// External references
for (unsigned i = 0; i < externTable.stringSize; i += (unsigned)strlen(externTable.strings+i) + 1)
writeSymbol(externTable.strings+i, 0, 0, STB_GLOBAL, STT_FUNC, SHN_UNDEF);
sections[sect_symtab].sh_info = EXTRA_SYMBOLS+memTableEntries; // One more than last local sym
sections[sect_symtab].sh_size = sizeof(ElfXX_Sym) * symbolNum;
// Now the binary data.
unsigned dataSection = sect_data;
for (unsigned i = 0; i < memTableEntries; i++)
{
sections[dataSection].sh_size = memTable[i].mtLength;
alignFile(sections[dataSection].sh_addralign);
sections[dataSection].sh_offset = ftell(exportFile);
fwrite(memTable[i].mtOriginalAddr, 1, memTable[i].mtLength, exportFile);
if ((memTable[i].mtFlags & (MTF_BYTES|MTF_WRITEABLE)) == MTF_BYTES)
dataSection += 1;
else dataSection += 2;
}
exportDescription exports;
memset(&exports, 0, sizeof(exports));
exports.structLength = sizeof(exportDescription);
exports.memTableSize = sizeof(memoryTableEntry);
exports.memTableEntries = memTableEntries;
exports.memTable = USE_RELA ? 0 : (memoryTableEntry *)memTableOffset;
// Set the value to be the offset relative to the base of the area. We have set a relocation
// already which will add the base of the area.
exports.rootFunction = USE_RELA ? 0 : (void*)rootOffset;
exports.timeStamp = getBuildTime();
exports.architecture = machineDependent->MachineArchitecture();
exports.rtsVersion = POLY_version_number;
#ifdef POLYML32IN64
exports.originalBaseAddr = globalHeapBase;
#else
exports.originalBaseAddr = 0;
#endif
// Set the address values to zero before we write. They will always
// be relative to their base symbol.
for (unsigned i = 0; i < memTableEntries; i++)
memTable[i].mtCurrentAddr = 0;
// Now the binary data.
alignFile(sections[sect_table_data].sh_addralign);
sections[sect_table_data].sh_offset = ftell(exportFile);
sections[sect_table_data].sh_size = sizeof(exportDescription) + memTableEntries*sizeof(memoryTableEntry);
fwrite(&exports, sizeof(exports), 1, exportFile);
fwrite(memTable, sizeof(memoryTableEntry), memTableEntries, exportFile);
// The section name table
sections[sect_sectionnametable].sh_offset = ftell(exportFile);
fwrite(sectionStrings.strings, sectionStrings.stringSize, 1, exportFile);
sections[sect_sectionnametable].sh_size = sectionStrings.stringSize;
// The symbol name table
sections[sect_stringtable].sh_offset = ftell(exportFile);
fwrite(symStrings.strings, symStrings.stringSize, 1, exportFile);
sections[sect_stringtable].sh_size = symStrings.stringSize;
// Finally the section headers.
alignFile(4);
fhdr.e_shoff = ftell(exportFile);
fwrite(sections, sizeof(ElfXX_Shdr) * numSections, 1, exportFile);
// Rewind to rewrite the file header with the offset of the section headers.
rewind(exportFile);
fwrite(&fhdr, sizeof(fhdr), 1, exportFile);
fclose(exportFile); exportFile = NULL;
delete[]sections;
}
diff --git a/libpolyml/elfexport.h b/libpolyml/elfexport.h
index 168a1447..93c39604 100644
--- a/libpolyml/elfexport.h
+++ b/libpolyml/elfexport.h
@@ -1,128 +1,132 @@
/*
Title: Export memory as an ELF object file
Author: David C. J. Matthews.
- Copyright (c) 2006, 2016-17 David C. J. Matthews
+ Copyright (c) 2006, 2016-17, 2020 David C. J. Matthews
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR H PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef ELFExport_H_INCLUDED
#define ELFExport_H_INCLUDED
#include "config.h"
#include "scanaddrs.h" // For base class
#include "exporter.h"
#ifdef HAVE_ELF_H
#include
#endif
#ifdef HAVE_ELF_ABI_H
#include
#endif
// Select 32 or 64 bit version depending on the word length
#if (SIZEOF_VOIDP == 8)
#define ElfXX_Addr Elf64_Addr
#define ElfXX_Rel Elf64_Rel
#define ElfXX_Rela Elf64_Rela
#define ElfXX_Sym Elf64_Sym
#define ElfXX_Ehdr Elf64_Ehdr
#define ElfXX_Shdr Elf64_Shdr
// Include a cast on the next line. Linux includes this anyway but it's needed for FreeBSD.
#define ELFXX_R_INFO(_y, _z) ELF64_R_INFO((Elf64_Xword)(_y), _z)
#define ELFXX_ST_INFO(_y, _z) ELF64_ST_INFO(_y, _z)
#define ELFCLASSXX ELFCLASS64
#else
#define ElfXX_Addr Elf32_Addr
#define ElfXX_Rel Elf32_Rel
#define ElfXX_Rela Elf32_Rela
#define ElfXX_Sym Elf32_Sym
#define ElfXX_Ehdr Elf32_Ehdr
#define ElfXX_Shdr Elf32_Shdr
#define ELFXX_R_INFO(_y, _z) ELF32_R_INFO(_y, _z)
#define ELFXX_ST_INFO(_y, _z) ELF32_ST_INFO(_y, _z)
#define ELFCLASSXX ELFCLASS32
#endif
#ifdef HOSTARCHITECTURE_MIPS64
/* MIPS N64 ABI has a different Elf64_Rel/Rela layout */
typedef struct
{
Elf64_Addr r_offset; /* Address */
Elf64_Word r_sym; /* Symbol index */
unsigned char r_ssym; /* Special symbol */
unsigned char r_type3; /* Third relocation type */
unsigned char r_type2; /* Second relocation type */
unsigned char r_type; /* First relocation type */
} Elf64_Mips_Rel;
typedef struct
{
Elf64_Addr r_offset; /* Address */
Elf64_Word r_sym; /* Symbol index */
unsigned char r_ssym; /* Special symbol */
unsigned char r_type3; /* Third relocation type */
unsigned char r_type2; /* Second relocation type */
unsigned char r_type; /* First relocation type */
Elf64_Sxword r_addend; /* Addend */
} Elf64_Mips_Rela;
#undef ElfXX_Rel
#define ElfXX_Rel Elf64_Mips_Rel
#undef ElfXX_Rela
#define ElfXX_Rela Elf64_Mips_Rela
/* Elf64_Mips_Rel/Rela has no r_info, so this macro is meaningless */
#undef ELFXX_R_INFO
#endif
class TaskData;
class ELFExport: public Exporter, public ScanAddress
{
public:
ELFExport(): relocationCount(0), symbolNum(0) {}
public:
virtual void exportStore(void);
private:
// ScanAddress overrides
- virtual void ScanConstant(PolyObject *base, byte *addrOfConst, ScanRelocationKind code);
+ virtual void ScanConstant(PolyObject *base, byte *addrOfConst, ScanRelocationKind code, intptr_t displacement);
// At the moment we should only get calls to ScanConstant.
virtual PolyObject *ScanObjectAddress(PolyObject *base) { return base; }
virtual void addExternalReference(void *addr, const char *name, bool isFuncPtr);
+ virtual void RelocateOnly(PolyObject* base, byte* addressOfConstant, ScanRelocationKind code)
+ {
+ ScanConstant(base, addressOfConstant, code, 0);
+ }
private:
void setRelocationAddress(void *p, ElfXX_Addr *reloc);
PolyWord createRelocation(PolyWord p, void *relocAddr);
PolyWord writeRelocation(POLYUNSIGNED offset, void *relocAddr, unsigned symbolNum, bool isFuncPtr);
void writeSymbol(const char *symbolName, long value, long size, int binding, int sttype, int section);
unsigned long makeStringTableEntry(const char *str, ExportStringTable *stab);
void alignFile(int align);
void createStructsRelocation(unsigned area, size_t offset, size_t addend);
unsigned relocationCount;
// There are two tables - one is used for section names, the other for symbol names.
ExportStringTable symStrings, sectionStrings;
// Table and count for external references.
ExportStringTable externTable;
unsigned symbolNum;
};
#endif
diff --git a/libpolyml/exporter.cpp b/libpolyml/exporter.cpp
index 96a1f056..de982c5b 100644
--- a/libpolyml/exporter.cpp
+++ b/libpolyml/exporter.cpp
@@ -1,926 +1,989 @@
/*
Title: exporter.cpp - Export a function as an object or C file
- Copyright (c) 2006-7, 2015, 2016-20 David C.J. Matthews
+ Copyright (c) 2006-7, 2015, 2016-21 David C.J. Matthews
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#elif defined(_WIN32)
#include "winconfig.h"
#else
#error "No configuration file"
#endif
#ifdef HAVE_ASSERT_H
#include
#define ASSERT(x) assert(x)
#else
#define ASSERT(x)
#endif
#ifdef HAVE_STRING_H
#include
#endif
#ifdef HAVE_ERRNO_H
#include
#endif
#ifdef HAVE_SYS_PARAM_H
#include
#endif
#ifdef HAVE_STDLIB_H
#include
#endif
#if (defined(_WIN32))
#include
#else
#define _T(x) x
#define _tcslen strlen
#define _tcscmp strcmp
#define _tcscat strcat
#endif
#include "exporter.h"
#include "save_vec.h"
#include "polystring.h"
#include "run_time.h"
#include "osmem.h"
#include "scanaddrs.h"
#include "gc.h"
#include "machine_dep.h"
#include "diagnostics.h"
#include "memmgr.h"
#include "processes.h" // For IO_SPACING
#include "sys.h" // For EXC_Fail
#include "rtsentry.h"
#include "pexport.h"
#ifdef HAVE_PECOFF
#include "pecoffexport.h"
#elif defined(HAVE_ELF_H) || defined(HAVE_ELF_ABI_H)
#include "elfexport.h"
#elif defined(HAVE_MACH_O_RELOC_H)
#include "machoexport.h"
#endif
#if (defined(_WIN32))
#define NOMEMORY ERROR_NOT_ENOUGH_MEMORY
#define ERRORNUMBER _doserrno
#else
#define NOMEMORY ENOMEM
#define ERRORNUMBER errno
#endif
extern "C" {
POLYEXTERNALSYMBOL POLYUNSIGNED PolyExport(FirstArgument threadId, PolyWord fileName, PolyWord root);
POLYEXTERNALSYMBOL POLYUNSIGNED PolyExportPortable(FirstArgument threadId, PolyWord fileName, PolyWord root);
}
/*
To export the function and everything reachable from it we need to copy
all the objects into a new area. We leave tombstones in the original
objects by overwriting the length word. That prevents us from copying an
object twice and breaks loops. Once we've copied the objects we then
have to go back over the memory and turn the tombstones back into length
words.
*/
GraveYard::~GraveYard()
{
free(graves);
}
// Used to calculate the space required for the ordinary mutables
// and the no-overwrite mutables. They are interspersed in local space.
class MutSizes : public ScanAddress
{
public:
MutSizes() : mutSize(0), noOverSize(0) {}
virtual PolyObject *ScanObjectAddress(PolyObject *base) { return base; }// No Actually used
virtual void ScanAddressesInObject(PolyObject *base, POLYUNSIGNED lengthWord)
{
const POLYUNSIGNED words = OBJ_OBJECT_LENGTH(lengthWord) + 1; // Include length word
if (OBJ_IS_NO_OVERWRITE(lengthWord))
noOverSize += words;
else mutSize += words;
}
POLYUNSIGNED mutSize, noOverSize;
};
CopyScan::CopyScan(unsigned h/*=0*/): hierarchy(h)
{
defaultImmSize = defaultMutSize = defaultCodeSize = defaultNoOverSize = 0;
tombs = 0;
graveYard = 0;
}
void CopyScan::initialise(bool isExport/*=true*/)
{
ASSERT(gMem.eSpaces.size() == 0);
// Set the space sizes to a proportion of the space currently in use.
// Computing these sizes is not obvious because CopyScan is used both
// for export and for saved states. For saved states in particular we
// want to use a smaller size because they are retained after we save
// the state and if we have many child saved states it's important not
// to waste memory.
if (hierarchy == 0)
{
graveYard = new GraveYard[gMem.pSpaces.size()];
if (graveYard == 0)
{
if (debugOptions & DEBUG_SAVING)
Log("SAVE: Unable to allocate graveyard, size: %lu.\n", gMem.pSpaces.size());
throw MemoryException();
}
}
for (std::vector::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++)
{
PermanentMemSpace *space = *i;
if (space->hierarchy >= hierarchy) {
// Include this if we're exporting (hierarchy=0) or if we're saving a state
// and will include this in the new state.
size_t size = (space->top-space->bottom)/4;
if (space->noOverwrite)
defaultNoOverSize += size;
else if (space->isMutable)
defaultMutSize += size;
else if (space->isCode)
defaultCodeSize += size;
else
defaultImmSize += size;
if (space->hierarchy == 0 && ! space->isMutable)
{
// We need a separate area for the tombstones because this is read-only
graveYard[tombs].graves = (PolyWord*)calloc(space->spaceSize(), sizeof(PolyWord));
if (graveYard[tombs].graves == 0)
{
if (debugOptions & DEBUG_SAVING)
Log("SAVE: Unable to allocate graveyard for permanent space, size: %lu.\n",
space->spaceSize() * sizeof(PolyWord));
throw MemoryException();
}
if (debugOptions & DEBUG_SAVING)
Log("SAVE: Allocated graveyard for permanent space, %p size: %lu.\n",
graveYard[tombs].graves, space->spaceSize() * sizeof(PolyWord));
graveYard[tombs].startAddr = space->bottom;
graveYard[tombs].endAddr = space->top;
tombs++;
}
}
}
for (std::vector::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
{
LocalMemSpace *space = *i;
uintptr_t size = space->allocatedSpace();
// It looks as though the mutable size generally gets
// overestimated while the immutable size is correct.
if (space->isMutable)
{
MutSizes sizeMut;
sizeMut.ScanAddressesInRegion(space->bottom, space->lowerAllocPtr);
sizeMut.ScanAddressesInRegion(space->upperAllocPtr, space->top);
defaultNoOverSize += sizeMut.noOverSize / 4;
defaultMutSize += sizeMut.mutSize / 4;
}
else
defaultImmSize += size/2;
}
for (std::vector::iterator i = gMem.cSpaces.begin(); i < gMem.cSpaces.end(); i++)
{
CodeSpace *space = *i;
uintptr_t size = space->spaceSize();
defaultCodeSize += size/2;
}
if (isExport)
{
// Minimum 1M words.
if (defaultMutSize < 1024*1024) defaultMutSize = 1024*1024;
if (defaultImmSize < 1024*1024) defaultImmSize = 1024*1024;
if (defaultCodeSize < 1024*1024) defaultCodeSize = 1024*1024;
#ifdef MACOSX
// Limit the segment size for Mac OS X. The linker has a limit of 2^24 relocations
// in a segment so this is a crude way of ensuring the limit isn't exceeded.
// It's unlikely to be exceeded by the code itself.
// Actually, from trial-and-error, the limit seems to be around 6M.
if (defaultMutSize > 6 * 1024 * 1024) defaultMutSize = 6 * 1024 * 1024;
if (defaultImmSize > 6 * 1024 * 1024) defaultImmSize = 6 * 1024 * 1024;
#endif
if (defaultNoOverSize < 4096) defaultNoOverSize = 4096; // Except for the no-overwrite area
}
else
{
// Much smaller minimum sizes for saved states.
if (defaultMutSize < 1024) defaultMutSize = 1024;
if (defaultImmSize < 4096) defaultImmSize = 4096;
if (defaultCodeSize < 4096) defaultCodeSize = 4096;
if (defaultNoOverSize < 4096) defaultNoOverSize = 4096;
// Set maximum sizes as well. We may have insufficient contiguous space for
// very large areas.
if (defaultMutSize > 1024 * 1024) defaultMutSize = 1024 * 1024;
if (defaultImmSize > 1024 * 1024) defaultImmSize = 1024 * 1024;
if (defaultCodeSize > 1024 * 1024) defaultCodeSize = 1024 * 1024;
if (defaultNoOverSize > 1024 * 1024) defaultNoOverSize = 1024 * 1024;
}
if (debugOptions & DEBUG_SAVING)
Log("SAVE: Copyscan default sizes: Immutable: %" POLYUFMT ", Mutable: %" POLYUFMT ", Code: %" POLYUFMT ", No-overwrite %" POLYUFMT ".\n",
defaultImmSize, defaultMutSize, defaultCodeSize, defaultNoOverSize);
}
CopyScan::~CopyScan()
{
gMem.DeleteExportSpaces();
if (graveYard)
delete[](graveYard);
}
// This function is called for each address in an object
// once it has been copied to its new location. We copy first
// then scan to update the addresses.
POLYUNSIGNED CopyScan::ScanAddressAt(PolyWord *pt)
{
PolyWord val = *pt;
// Ignore integers.
if (IS_INT(val) || val == PolyWord::FromUnsigned(0))
return 0;
PolyObject *obj = val.AsObjPtr();
POLYUNSIGNED l = ScanAddress(&obj);
*pt = obj;
return l;
}
// This function is called for each address in an object
// once it has been copied to its new location. We copy first
// then scan to update the addresses.
POLYUNSIGNED CopyScan::ScanAddress(PolyObject **pt)
{
PolyObject *obj = *pt;
MemSpace *space = gMem.SpaceForObjectAddress(obj);
ASSERT(space != 0);
// We may sometimes get addresses that have already been updated
// to point to the new area. e.g. (only?) in the case of constants
// that have been updated in ScanConstantsWithinCode.
if (space->spaceType == ST_EXPORT)
return 0;
// If this is at a lower level than the hierarchy we are saving
// then leave it untouched.
if (space->spaceType == ST_PERMANENT)
{
PermanentMemSpace *pmSpace = (PermanentMemSpace*)space;
if (pmSpace->hierarchy < hierarchy)
return 0;
}
// Have we already scanned this?
if (obj->ContainsForwardingPtr())
{
// Update the address to the new value.
#ifdef POLYML32IN64
PolyObject *newAddr;
if (space->isCode)
newAddr = (PolyObject*)(globalCodeBase + ((obj->LengthWord() & ~_OBJ_TOMBSTONE_BIT) << 1));
else newAddr = obj->GetForwardingPtr();
#else
PolyObject *newAddr = obj->GetForwardingPtr();
#endif
*pt = newAddr;
return 0; // No need to scan it again.
}
else if (space->spaceType == ST_PERMANENT)
{
// See if we have this in the grave-yard.
for (unsigned i = 0; i < tombs; i++)
{
GraveYard *g = &graveYard[i];
if ((PolyWord*)obj >= g->startAddr && (PolyWord*)obj < g->endAddr)
{
PolyWord *tombAddr = g->graves + ((PolyWord*)obj - g->startAddr);
PolyObject *tombObject = (PolyObject*)tombAddr;
if (tombObject->ContainsForwardingPtr())
{
#ifdef POLYML32IN64
PolyObject *newAddr;
if (space->isCode)
newAddr = (PolyObject*)(globalCodeBase + ((tombObject->LengthWord() & ~_OBJ_TOMBSTONE_BIT) << 1));
else newAddr = tombObject->GetForwardingPtr();
#else
PolyObject *newAddr = tombObject->GetForwardingPtr();
#endif
*pt = newAddr;
return 0;
}
break; // No need to look further
}
}
}
// No, we need to copy it.
ASSERT(space->spaceType == ST_LOCAL || space->spaceType == ST_PERMANENT ||
space->spaceType == ST_CODE);
POLYUNSIGNED lengthWord = obj->LengthWord();
+ POLYUNSIGNED originalLengthWord = lengthWord;
POLYUNSIGNED words = OBJ_OBJECT_LENGTH(lengthWord);
- PolyObject *newObj = 0;
- PolyObject* writAble = 0;
- bool isMutableObj = obj->IsMutable();
- bool isNoOverwrite = false;
- bool isByteObj = obj->IsByteObject();
- bool isCodeObj = false;
- if (isMutableObj)
- isNoOverwrite = obj->IsNoOverwriteObject();
- else isCodeObj = obj->IsCodeObject();
- // Allocate a new address for the object.
- for (std::vector::iterator i = gMem.eSpaces.begin(); i < gMem.eSpaces.end(); i++)
+ enum _newAddrType naType;
+ if (obj->IsMutable())
{
- PermanentMemSpace *space = *i;
- if (isMutableObj == space->isMutable &&
- isNoOverwrite == space->noOverwrite &&
- isByteObj == space->byteOnly &&
- isCodeObj == space->isCode)
- {
- ASSERT(space->topPointer <= space->top && space->topPointer >= space->bottom);
- size_t spaceLeft = space->top - space->topPointer;
- if (spaceLeft > words)
- {
- newObj = (PolyObject*)(space->topPointer + 1);
- writAble = space->writeAble(newObj);
- space->topPointer += words + 1;
-#ifdef POLYML32IN64
- // Maintain the odd-word alignment of topPointer
- if ((words & 1) == 0 && space->topPointer < space->top)
- {
- *space->writeAble(space->topPointer) = PolyWord::FromUnsigned(0);
- space->topPointer++;
- }
-#endif
- break;
- }
- }
- }
- if (newObj == 0)
+ if (obj->IsNoOverwriteObject()) naType = NANoOverwriteMutable; else naType = NAMutable;
+ }
+ else if (obj->IsCodeObject()) naType = NACode;
+ else if (obj->IsByteObject()) naType = NAByte;
+ else naType = NAWord;
+ PolyObject* newObj;
+#if(defined(HOSTARCHITECTURE_X86_64) && ! defined(POLYML32IN64))
+ // Split the constant area off into a separate object. This allows us to create a
+ // position-independent executable.
+ if (obj->IsCodeObject() && hierarchy == 0)
{
- // Didn't find room in the existing spaces. Create a new space.
- uintptr_t spaceWords;
- if (isMutableObj)
- {
- if (isNoOverwrite) spaceWords = defaultNoOverSize;
- else spaceWords = defaultMutSize;
- }
- else
- {
- if (isCodeObj) spaceWords = defaultCodeSize;
- else spaceWords = defaultImmSize;
- }
- if (spaceWords <= words)
- spaceWords = words + 1; // Make sure there's space for this object.
- PermanentMemSpace *space = gMem.NewExportSpace(spaceWords, isMutableObj, isNoOverwrite, isCodeObj);
- if (isByteObj) space->byteOnly = true;
- if (space == 0)
- {
- if (debugOptions & DEBUG_SAVING)
- Log("SAVE: Unable to allocate export space, size: %lu.\n", spaceWords);
- // Unable to allocate this.
- throw MemoryException();
- }
- newObj = (PolyObject*)(space->topPointer + 1);
- writAble = space->writeAble(newObj);
- space->topPointer += words + 1;
-#ifdef POLYML32IN64
- // Maintain the odd-word alignment of topPointer
- if ((words & 1) == 0 && space->topPointer < space->top)
- {
- *space->writeAble(space->topPointer) = PolyWord::FromUnsigned(0);
- space->topPointer++;
- }
-#endif
- ASSERT(space->topPointer <= space->top && space->topPointer >= space->bottom);
+ PolyWord* constPtr;
+ POLYUNSIGNED numConsts;
+ machineDependent->GetConstSegmentForCode(obj, constPtr, numConsts);
+ // Newly generated code will have the constants included with the code
+ // but if this is in the executable the constants will have been extracted before.
+ bool constsWereIncluded = constPtr > (PolyWord*)obj && constPtr < ((PolyWord*)obj) + words;
+ POLYUNSIGNED codeAreaSize = words;
+ if (constsWereIncluded)
+ codeAreaSize -= numConsts + 1;
+ newObj = newAddressForObject(codeAreaSize, NACode);
+ PolyObject* writable = gMem.SpaceForObjectAddress(newObj)->writeAble(newObj);
+ writable->SetLengthWord(codeAreaSize, F_CODE_OBJ); // set length word
+ lengthWord = newObj->LengthWord(); // Get the actual length word used
+ memcpy(writable, obj, codeAreaSize * sizeof(PolyWord));
+ PolyObject* newConsts = newAddressForObject(numConsts, NACodeConst);
+ newConsts->SetLengthWord(numConsts);
+ memcpy(newConsts, constPtr, numConsts * sizeof(PolyWord));
+ // Set the last word of the new area to the offset of the constants from the end of
+ // the code.
+ int64_t offset = (byte*)newConsts - (byte*)newObj - codeAreaSize * sizeof(PolyWord);
+ ASSERT(offset >= -(int64_t)0x80000000 && offset <= (int64_t)0x7fffffff);
+ ASSERT(offset < ((int64_t)1) << 32 && offset > ((int64_t)(-1)) << 32);
+ writable->Set(codeAreaSize - 1, PolyWord::FromSigned(offset));
}
-
- writAble->SetLengthWord(lengthWord); // copy length word
-
- if (hierarchy == 0 /* Exporting object module */ && isNoOverwrite && isMutableObj && !isByteObj)
+ else
+#endif
{
- // These are not exported. They are used for special values e.g. mutexes
- // that should be set to 0/nil/NONE at start-up.
- // Weak+No-overwrite byte objects are used for entry points and volatiles
- // in the foreign-function interface and have to be treated specially.
+ newObj = newAddressForObject(words, naType);
+ PolyObject* writAble = gMem.SpaceForObjectAddress(newObj)->writeAble(newObj);
+ writAble->SetLengthWord(lengthWord); // copy length word
- // Note: this must not be done when exporting a saved state because the
- // copied version is used as the local data for the rest of the session.
- for (POLYUNSIGNED i = 0; i < words; i++)
- writAble->Set(i, TAGGED(0));
+ if (hierarchy == 0 /* Exporting object module */ && obj->IsNoOverwriteObject() && ! obj->IsByteObject())
+ {
+ // These are not exported. They are used for special values e.g. mutexes
+ // that should be set to 0/nil/NONE at start-up.
+ // Weak+No-overwrite byte objects are used for entry points and volatiles
+ // in the foreign-function interface and have to be treated specially.
+
+ // Note: this must not be done when exporting a saved state because the
+ // copied version is used as the local data for the rest of the session.
+ for (POLYUNSIGNED i = 0; i < words; i++)
+ writAble->Set(i, TAGGED(0));
+ }
+ else memcpy(writAble, obj, words * sizeof(PolyWord));
}
- else memcpy(writAble, obj, words * sizeof(PolyWord));
if (space->spaceType == ST_PERMANENT && !space->isMutable && ((PermanentMemSpace*)space)->hierarchy == 0)
{
// The immutable permanent areas are read-only.
unsigned m;
for (m = 0; m < tombs; m++)
{
GraveYard *g = &graveYard[m];
if ((PolyWord*)obj >= g->startAddr && (PolyWord*)obj < g->endAddr)
{
PolyWord *tombAddr = g->graves + ((PolyWord*)obj - g->startAddr);
PolyObject *tombObject = (PolyObject*)tombAddr;
#ifdef POLYML32IN64
if (isCodeObj)
{
POLYUNSIGNED ll = (POLYUNSIGNED)(((PolyWord*)newObj - globalCodeBase) >> 1 | _OBJ_TOMBSTONE_BIT);
tombObject->SetLengthWord(ll);
}
else tombObject->SetForwardingPtr(newObj);
#else
tombObject->SetForwardingPtr(newObj);
#endif
break; // No need to look further
}
}
ASSERT(m < tombs); // Should be there.
}
- else if (isCodeObj)
+ else if (naType == NACode)
#ifdef POLYML32IN64
// If this is a code address we can't use the usual forwarding pointer format.
// Instead we have to compute the offset relative to the base of the code.
{
POLYUNSIGNED ll = (POLYUNSIGNED)(((PolyWord*)newObj-globalCodeBase) >> 1 | _OBJ_TOMBSTONE_BIT);
gMem.SpaceForObjectAddress(obj)->writeAble(obj)->SetLengthWord(ll);
}
#else
gMem.SpaceForObjectAddress(obj)->writeAble(obj)->SetForwardingPtr(newObj);
#endif
else obj->SetForwardingPtr(newObj); // Put forwarding pointer in old object.
- if (OBJ_IS_CODE_OBJECT(lengthWord))
+ if (naType == NACode)
{
- // We don't need to worry about flushing the instruction cache
- // since we're not going to execute this code here.
- // We do have to update any relative addresses within the code
+ // We should flush the instruction cache here since we will execute the code
+ // at this location if this is a saved state.
+ machineDependent->FlushInstructionCache(newObj, newObj->Length());
+ // We have to update any relative addresses within the code
// to take account of its new position. We have to do that now
// even though ScanAddressesInObject will do it again because this
// is the only point where we have both the old and the new addresses.
- machineDependent->ScanConstantsWithinCode(newObj, obj, words, this);
+ PolyWord *oldConstAddr;
+ POLYUNSIGNED count;
+ machineDependent->GetConstSegmentForCode(obj, OBJ_OBJECT_LENGTH(originalLengthWord), oldConstAddr, count);
+ PolyWord *newConstAddr = machineDependent->ConstPtrForCode(newObj);
+ machineDependent->ScanConstantsWithinCode(newObj, obj, words, newConstAddr, oldConstAddr, count, this);
}
*pt = newObj; // Update it to the newly copied object.
return lengthWord; // This new object needs to be scanned.
}
+PolyObject* CopyScan::newAddressForObject(POLYUNSIGNED words, enum _newAddrType naType)
+{
+ PolyObject* newObj = 0;
+ // Allocate a new address for the object.
+ for (std::vector::iterator i = gMem.eSpaces.begin(); i < gMem.eSpaces.end(); i++)
+ {
+ PermanentMemSpace* space = *i;
+ bool match = false;
+ switch (naType)
+ {
+ case NAWord: match = !space->isMutable && !space->byteOnly && !space->isCode;
+ case NAMutable: match = space->isMutable && !space->noOverwrite; break;
+ case NANoOverwriteMutable: match = space->isMutable && space->noOverwrite; break;
+ case NAByte: match = !space->isMutable && space->byteOnly; break;
+ case NACode: match = !space->isMutable && space->isCode && !space->constArea; break;
+ case NACodeConst: match = !space->isMutable && space->isCode && space->constArea; break;
+ }
+ if (match)
+ {
+ ASSERT(space->topPointer <= space->top && space->topPointer >= space->bottom);
+ size_t spaceLeft = space->top - space->topPointer;
+ if (spaceLeft > words)
+ {
+ newObj = (PolyObject*)(space->topPointer + 1);
+ space->topPointer += words + 1;
+#ifdef POLYML32IN64
+ // Maintain the odd-word alignment of topPointer
+ if ((words & 1) == 0 && space->topPointer < space->top)
+ {
+ *space->writeAble(space->topPointer) = PolyWord::FromUnsigned(0);
+ space->topPointer++;
+ }
+#endif
+ break;
+ }
+ }
+ }
+ if (newObj == 0)
+ {
+ // Didn't find room in the existing spaces. Create a new space.
+ uintptr_t spaceWords;
+ switch (naType)
+ {
+ case NAMutable: spaceWords = defaultMutSize; break;
+ case NANoOverwriteMutable: spaceWords = defaultNoOverSize; break;
+ case NACode: spaceWords = defaultCodeSize; break;
+ case NACodeConst: spaceWords = defaultCodeSize; break;
+ default: spaceWords = defaultImmSize;
+ }
+ if (spaceWords <= words)
+ spaceWords = words + 1; // Make sure there's space for this object.
+ PermanentMemSpace* space =
+ gMem.NewExportSpace(spaceWords, naType == NAMutable || naType == NANoOverwriteMutable, naType == NANoOverwriteMutable,
+ naType == NACode || naType == NACodeConst);
+ if (naType == NAByte) space->byteOnly = true;
+ if (naType == NACodeConst) space->constArea = true;
+ if (space == 0)
+ {
+ if (debugOptions & DEBUG_SAVING)
+ Log("SAVE: Unable to allocate export space, size: %lu.\n", spaceWords);
+ // Unable to allocate this.
+ throw MemoryException();
+ }
+ newObj = (PolyObject*)(space->topPointer + 1);
+ space->topPointer += words + 1;
+#ifdef POLYML32IN64
+ // Maintain the odd-word alignment of topPointer
+ if ((words & 1) == 0 && space->topPointer < space->top)
+ {
+ *space->writeAble(space->topPointer) = PolyWord::FromUnsigned(0);
+ space->topPointer++;
+ }
+#endif
+ ASSERT(space->topPointer <= space->top && space->topPointer >= space->bottom);
+ }
+ return newObj;
+}
+
// The address of code in the code area. We treat this as a normal heap cell.
// We will probably need to copy this and to process addresses within it.
POLYUNSIGNED CopyScan::ScanCodeAddressAt(PolyObject **pt)
{
POLYUNSIGNED lengthWord = ScanAddress(pt);
if (lengthWord)
ScanAddressesInObject(*pt, lengthWord);
return 0;
}
PolyObject *CopyScan::ScanObjectAddress(PolyObject *base)
{
PolyWord val = base;
// Scan this as an address.
POLYUNSIGNED lengthWord = CopyScan::ScanAddressAt(&val);
if (lengthWord)
ScanAddressesInObject(val.AsObjPtr(), lengthWord);
return val.AsObjPtr();
}
#define MAX_EXTENSION 4 // The longest extension we may need to add is ".obj"
// Convert the forwarding pointers in a region back into length words.
// Generally if this object has a forwarding pointer that's
// because we've moved it into the export region. We can,
// though, get multiple levels of forwarding if there is an object
// that has been shifted up by a garbage collection, leaving a forwarding
// pointer and then that object has been moved to the export region.
// We mustn't turn locally forwarded values back into ordinary objects
// because they could contain addresses that are no longer valid.
static POLYUNSIGNED GetObjLength(PolyObject *obj)
{
if (obj->ContainsForwardingPtr())
{
PolyObject *forwardedTo;
#ifdef POLYML32IN64
{
MemSpace *space = gMem.SpaceForObjectAddress(obj);
if (space->isCode)
forwardedTo = (PolyObject*)(globalCodeBase + ((obj->LengthWord() & ~_OBJ_TOMBSTONE_BIT) << 1));
else forwardedTo = obj->GetForwardingPtr();
}
#else
forwardedTo = obj->GetForwardingPtr();
#endif
POLYUNSIGNED length = GetObjLength(forwardedTo);
MemSpace *space = gMem.SpaceForObjectAddress(forwardedTo);
if (space->spaceType == ST_EXPORT)
+ {
+ // If this is a code object whose constant area has been split off we
+ // need to add the length of the constant area.
+ if (forwardedTo->IsCodeObject())
+ {
+ PolyWord* constPtr;
+ POLYUNSIGNED numConsts;
+ machineDependent->GetConstSegmentForCode(forwardedTo, constPtr, numConsts);
+ if (!(constPtr > (PolyWord*)forwardedTo && constPtr < ((PolyWord*)forwardedTo) + OBJ_OBJECT_LENGTH(length)))
+ length += numConsts + 1;
+ }
gMem.SpaceForObjectAddress(obj)->writeAble(obj)->SetLengthWord(length);
+ }
return length;
}
else {
ASSERT(obj->ContainsNormalLengthWord());
return obj->LengthWord();
}
}
static void FixForwarding(PolyWord *pt, size_t space)
{
while (space)
{
pt++;
PolyObject *obj = (PolyObject*)pt;
#ifdef POLYML32IN64
if ((uintptr_t)obj & 4)
{
// Skip filler words needed to align to an even word
space--;
continue; // We've added 1 to pt so just loop.
}
#endif
size_t length = OBJ_OBJECT_LENGTH(GetObjLength(obj));
pt += length;
ASSERT(space > length);
space -= length+1;
}
}
class ExportRequest: public MainThreadRequest
{
public:
ExportRequest(Handle root, Exporter *exp): MainThreadRequest(MTP_EXPORTING),
exportRoot(root), exporter(exp) {}
virtual void Perform() { exporter->RunExport(exportRoot->WordP()); }
Handle exportRoot;
Exporter *exporter;
};
static void exporter(TaskData *taskData, Handle fileName, Handle root, const TCHAR *extension, Exporter *exports)
{
size_t extLen = _tcslen(extension);
TempString fileNameBuff(Poly_string_to_T_alloc(fileName->Word(), extLen));
if (fileNameBuff == NULL)
raise_syscall(taskData, "Insufficient memory", NOMEMORY);
size_t length = _tcslen(fileNameBuff);
// Does it already have the extension? If not add it on.
if (length < extLen || _tcscmp(fileNameBuff + length - extLen, extension) != 0)
_tcscat(fileNameBuff, extension);
#if (defined(_WIN32) && defined(UNICODE))
exports->exportFile = _wfopen(fileNameBuff, L"wb");
#else
exports->exportFile = fopen(fileNameBuff, "wb");
#endif
if (exports->exportFile == NULL)
raise_syscall(taskData, "Cannot open export file", ERRORNUMBER);
// Request a full GC to reduce the size of fix-ups.
FullGC(taskData);
// Request the main thread to do the export.
ExportRequest request(root, exports);
processes->MakeRootRequest(taskData, &request);
if (exports->errorMessage)
raise_fail(taskData, exports->errorMessage);
}
// This is called by the initial thread to actually do the export.
void Exporter::RunExport(PolyObject *rootFunction)
{
Exporter *exports = this;
PolyObject *copiedRoot = 0;
CopyScan copyScan(hierarchy);
try {
copyScan.initialise();
// Copy the root and everything reachable from it into the temporary area.
copiedRoot = copyScan.ScanObjectAddress(rootFunction);
}
catch (MemoryException &)
{
// If we ran out of memory.
copiedRoot = 0;
}
// Fix the forwarding pointers.
for (std::vector::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
{
LocalMemSpace *space = *i;
// Local areas only have objects from the allocation pointer to the top.
FixForwarding(space->bottom, space->lowerAllocPtr - space->bottom);
FixForwarding(space->upperAllocPtr, space->top - space->upperAllocPtr);
}
for (std::vector::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++)
{
MemSpace *space = *i;
// Permanent areas are filled with objects from the bottom.
FixForwarding(space->bottom, space->top - space->bottom);
}
for (std::vector::iterator i = gMem.cSpaces.begin(); i < gMem.cSpaces.end(); i++)
{
MemSpace *space = *i;
// Code areas are filled with objects from the bottom.
FixForwarding(space->bottom, space->top - space->bottom);
}
// Reraise the exception after cleaning up the forwarding pointers.
if (copiedRoot == 0)
{
exports->errorMessage = "Insufficient Memory";
return;
}
// Copy the areas into the export object.
size_t tableEntries = gMem.eSpaces.size();
unsigned memEntry = 0;
if (hierarchy != 0) tableEntries += gMem.pSpaces.size();
exports->memTable = new memoryTableEntry[tableEntries];
// If we're constructing a module we need to include the global spaces.
if (hierarchy != 0)
{
// Permanent spaces from the executable.
for (std::vector::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++)
{
PermanentMemSpace *space = *i;
if (space->hierarchy < hierarchy)
{
memoryTableEntry *entry = &exports->memTable[memEntry++];
entry->mtOriginalAddr = entry->mtCurrentAddr = space->bottom;
entry->mtLength = (space->topPointer-space->bottom)*sizeof(PolyWord);
entry->mtIndex = space->index;
entry->mtFlags = 0;
if (space->isMutable) entry->mtFlags |= MTF_WRITEABLE;
if (space->isCode) entry->mtFlags |= MTF_EXECUTABLE;
}
}
newAreas = memEntry;
}
for (std::vector::iterator i = gMem.eSpaces.begin(); i < gMem.eSpaces.end(); i++)
{
memoryTableEntry *entry = &exports->memTable[memEntry++];
PermanentMemSpace *space = *i;
entry->mtOriginalAddr = entry->mtCurrentAddr = space->bottom;
entry->mtLength = (space->topPointer-space->bottom)*sizeof(PolyWord);
entry->mtIndex = hierarchy == 0 ? memEntry-1 : space->index;
entry->mtFlags = 0;
if (space->isMutable)
{
entry->mtFlags = MTF_WRITEABLE;
if (space->noOverwrite) entry->mtFlags |= MTF_NO_OVERWRITE;
}
- if (space->isCode) entry->mtFlags |= MTF_EXECUTABLE;
+ if (space->isCode && !space->constArea) entry->mtFlags |= MTF_EXECUTABLE;
if (space->byteOnly) entry->mtFlags |= MTF_BYTES;
}
ASSERT(memEntry == tableEntries);
exports->memTableEntries = memEntry;
exports->rootFunction = copiedRoot;
try {
// This can raise MemoryException at least in PExport::exportStore.
exports->exportStore();
}
catch (MemoryException &) {
exports->errorMessage = "Insufficient Memory";
}
}
// Functions called via the RTS call.
Handle exportNative(TaskData *taskData, Handle args)
{
#ifdef HAVE_PECOFF
// Windows including Cygwin
#if (defined(_WIN32))
const TCHAR *extension = _T(".obj"); // Windows
#else
const char *extension = ".o"; // Cygwin
#endif
PECOFFExport exports;
exporter(taskData, taskData->saveVec.push(args->WordP()->Get(0)),
taskData->saveVec.push(args->WordP()->Get(1)), extension, &exports);
#elif defined(HAVE_ELF_H) || defined(HAVE_ELF_ABI_H)
// Most Unix including Linux, FreeBSD and Solaris.
const char *extension = ".o";
ELFExport exports;
exporter(taskData, taskData->saveVec.push(args->WordP()->Get(0)),
taskData->saveVec.push(args->WordP()->Get(1)), extension, &exports);
#elif defined(HAVE_MACH_O_RELOC_H)
// Mac OS-X
const char *extension = ".o";
MachoExport exports;
exporter(taskData, taskData->saveVec.push(args->WordP()->Get(0)),
taskData->saveVec.push(args->WordP()->Get(1)), extension, &exports);
#else
raise_exception_string (taskData, EXC_Fail, "Native export not available for this platform");
#endif
return taskData->saveVec.push(TAGGED(0));
}
Handle exportPortable(TaskData *taskData, Handle args)
{
PExport exports;
exporter(taskData, taskData->saveVec.push(args->WordP()->Get(0)),
taskData->saveVec.push(args->WordP()->Get(1)), _T(".txt"), &exports);
return taskData->saveVec.push(TAGGED(0));
}
POLYUNSIGNED PolyExport(FirstArgument threadId, PolyWord fileName, PolyWord root)
{
TaskData *taskData = TaskData::FindTaskForId(threadId);
ASSERT(taskData != 0);
taskData->PreRTSCall();
Handle reset = taskData->saveVec.mark();
Handle pushedName = taskData->saveVec.push(fileName);
Handle pushedRoot = taskData->saveVec.push(root);
try {
#ifdef HAVE_PECOFF
// Windows including Cygwin
#if (defined(_WIN32))
const TCHAR *extension = _T(".obj"); // Windows
#else
const char *extension = ".o"; // Cygwin
#endif
PECOFFExport exports;
exporter(taskData, pushedName, pushedRoot, extension, &exports);
#elif defined(HAVE_ELF_H) || defined(HAVE_ELF_ABI_H)
// Most Unix including Linux, FreeBSD and Solaris.
const char *extension = ".o";
ELFExport exports;
exporter(taskData, pushedName, pushedRoot, extension, &exports);
#elif defined(HAVE_MACH_O_RELOC_H)
// Mac OS-X
const char *extension = ".o";
MachoExport exports;
exporter(taskData, pushedName, pushedRoot, extension, &exports);
#else
raise_exception_string (taskData, EXC_Fail, "Native export not available for this platform");
#endif
} catch (...) { } // If an ML exception is raised
taskData->saveVec.reset(reset);
taskData->PostRTSCall();
return TAGGED(0).AsUnsigned(); // Returns unit
}
POLYUNSIGNED PolyExportPortable(FirstArgument threadId, PolyWord fileName, PolyWord root)
{
TaskData *taskData = TaskData::FindTaskForId(threadId);
ASSERT(taskData != 0);
taskData->PreRTSCall();
Handle reset = taskData->saveVec.mark();
Handle pushedName = taskData->saveVec.push(fileName);
Handle pushedRoot = taskData->saveVec.push(root);
try {
PExport exports;
exporter(taskData, pushedName, pushedRoot, _T(".txt"), &exports);
} catch (...) { } // If an ML exception is raised
taskData->saveVec.reset(reset);
taskData->PostRTSCall();
return TAGGED(0).AsUnsigned(); // Returns unit
}
// Helper functions for exporting. We need to produce relocation information
// and this code is common to every method.
Exporter::Exporter(unsigned int h): exportFile(NULL), errorMessage(0), hierarchy(h), memTable(0), newAreas(0)
{
}
Exporter::~Exporter()
{
delete[](memTable);
if (exportFile)
fclose(exportFile);
}
void Exporter::relocateValue(PolyWord *pt)
{
#ifndef POLYML32IN64
PolyWord q = *pt;
if (IS_INT(q) || q == PolyWord::FromUnsigned(0)) {}
else createRelocation(pt);
#endif
}
void Exporter::createRelocation(PolyWord* pt)
{
*gMem.SpaceForAddress(pt)->writeAble(pt) = createRelocation(*pt, pt);
}
// Check through the areas to see where the address is. It must be
// in one of them.
unsigned Exporter::findArea(void *p)
{
for (unsigned i = 0; i < memTableEntries; i++)
{
if (p > memTable[i].mtOriginalAddr &&
p <= (char*)memTable[i].mtOriginalAddr + memTable[i].mtLength)
return i;
}
{ ASSERT(0); }
return 0;
}
void Exporter::relocateObject(PolyObject *p)
{
if (p->IsByteObject())
{
if (p->IsMutable() && p->IsWeakRefObject())
{
// Weak mutable byte refs are used for external references and
// also in the FFI for non-persistent values.
bool isFuncPtr = true;
const char *entryName = getEntryPointName(p, &isFuncPtr);
if (entryName != 0) addExternalReference(p, entryName, isFuncPtr);
// Clear the first word of the data.
ASSERT(p->Length() >= sizeof(uintptr_t)/sizeof(PolyWord));
*(uintptr_t*)p = 0;
}
}
else if (p->IsCodeObject())
{
POLYUNSIGNED constCount;
PolyWord *cp;
ASSERT(! p->IsMutable() );
- p->GetConstSegmentForCode(cp, constCount);
+ machineDependent->GetConstSegmentForCode(p, cp, constCount);
/* Now the constants. */
for (POLYUNSIGNED i = 0; i < constCount; i++) relocateValue(&(cp[i]));
-
}
else // Closure and ordinary objects
{
POLYUNSIGNED length = p->Length();
for (POLYUNSIGNED i = 0; i < length; i++) relocateValue(p->Offset(i));
}
}
ExportStringTable::ExportStringTable(): strings(0), stringSize(0), stringAvailable(0)
{
}
ExportStringTable::~ExportStringTable()
{
free(strings);
}
// Add a string to the string table, growing it if necessary.
unsigned long ExportStringTable::makeEntry(const char *str)
{
unsigned len = (unsigned)strlen(str);
unsigned long entry = stringSize;
if (stringSize + len + 1 > stringAvailable)
{
stringAvailable = stringAvailable+stringAvailable/2;
if (stringAvailable < stringSize + len + 1)
stringAvailable = stringSize + len + 1 + 500;
char* newStrings = (char*)realloc(strings, stringAvailable);
if (newStrings == 0)
{
if (debugOptions & DEBUG_SAVING)
Log("SAVE: Unable to realloc string table, size: %lu.\n", stringAvailable);
throw MemoryException();
}
else strings = newStrings;
}
strcpy(strings + stringSize, str);
stringSize += len + 1;
return entry;
}
struct _entrypts exporterEPT[] =
{
{ "PolyExport", (polyRTSFunction)&PolyExport},
{ "PolyExportPortable", (polyRTSFunction)&PolyExportPortable},
{ NULL, NULL} // End of list.
};
diff --git a/libpolyml/exporter.h b/libpolyml/exporter.h
index 02a3b6fb..59d28500 100644
--- a/libpolyml/exporter.h
+++ b/libpolyml/exporter.h
@@ -1,120 +1,131 @@
/*
Title: exporter.h - Export a function as an object or C file
- Copyright (c) 2006, 2015-17, 2020 David C.J. Matthews
+ Copyright (c) 2006, 2015-17, 2020-21 David C.J. Matthews
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef EXPORTER_H_INCLUDED
#define EXPORTER_H_INCLUDED
#include "globals.h" // For PolyWord
#include "../polyexports.h" // For struct _memTableEntry
#ifdef HAVE_STDIO_H
#include // For FILE
#endif
class SaveVecEntry;
typedef SaveVecEntry *Handle;
class TaskData;
extern Handle exportNative(TaskData *mdTaskData, Handle args);
extern Handle exportPortable(TaskData *mdTaskData, Handle args);
// This is the base class for the exporters for the various object-code formats.
class Exporter
{
public:
Exporter(unsigned int h=0);
virtual ~Exporter();
virtual void exportStore(void) = 0;
// Called by the root thread to do the work.
void RunExport(PolyObject *rootFunction);
protected:
virtual PolyWord createRelocation(PolyWord p, void *relocAddr) = 0;
void relocateValue(PolyWord *pt);
void relocateObject(PolyObject *p);
void createRelocation(PolyWord *pt);
unsigned findArea(void *p); // Find index of area that address is in.
virtual void addExternalReference(void *p, const char *entryPoint, bool isFuncPtr) {}
public:
FILE *exportFile;
const char *errorMessage;
protected:
unsigned int hierarchy;
struct _memTableEntry *memTable;
unsigned memTableEntries;
PolyObject *rootFunction; // Address of the root function.
unsigned newAreas;
};
// The object-code exporters all use a similar string table format
// consisting of null-terminated C-strings.
class ExportStringTable
{
public:
ExportStringTable();
~ExportStringTable();
unsigned long makeEntry(const char *str);
char *strings;
unsigned long stringSize, stringAvailable;
};
#include "scanaddrs.h"
// Because permanent immutable areas are read-only we need to
// have somewhere else to hold the tomb-stones.
class GraveYard {
public:
GraveYard() { graves = 0; }
~GraveYard();
PolyWord *graves;
PolyWord *startAddr, *endAddr;
};
class CopyScan: public ScanAddress
{
public:
CopyScan(unsigned h=0);
void initialise(bool isExport=true);
~CopyScan();
protected:
virtual POLYUNSIGNED ScanAddressAt(PolyWord *pt);
// Have to follow pointers from closures into code.
virtual POLYUNSIGNED ScanCodeAddressAt(PolyObject **pt);
POLYUNSIGNED ScanAddress(PolyObject **pt);
+private:
+ enum _newAddrType {
+ NAWord,
+ NAMutable,
+ NANoOverwriteMutable,
+ NAByte,
+ NACode,
+ NACodeConst
+ };
+
+ PolyObject* newAddressForObject(POLYUNSIGNED words, enum _newAddrType naType);
public:
virtual PolyObject *ScanObjectAddress(PolyObject *base);
// Default sizes of the segments.
uintptr_t defaultImmSize, defaultCodeSize, defaultMutSize, defaultNoOverSize;
unsigned hierarchy;
GraveYard *graveYard;
unsigned tombs;
};
extern struct _entrypts exporterEPT[];
#endif
diff --git a/libpolyml/gc_mark_phase.cpp b/libpolyml/gc_mark_phase.cpp
index b14ccc1a..66e1e97c 100644
--- a/libpolyml/gc_mark_phase.cpp
+++ b/libpolyml/gc_mark_phase.cpp
@@ -1,888 +1,888 @@
/*
Title: Multi-Threaded Garbage Collector - Mark phase
Copyright (c) 2010-12, 2015-16, 2019 David C. J. Matthews
Based on the original garbage collector code
Copyright 2000-2008
Cambridge University Technical Services Limited
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
This is the first, mark, phase of the garbage collector. It detects all
reachable cells in the area being collected. At the end of the phase the
bit-maps associated with the areas will have ones for words belonging to cells
that must be retained and zeros for words that can be reused.
This is now multi-threaded. The mark phase involves setting a bit in the header
of each live cell and then a pass over the memory building the bitmaps and clearing
this bit. It is unfortunate that we cannot use the GC-bit that is used in
forwarding pointers but we may well have forwarded pointers left over from a
partially completed minor GC. Using a bit in the header avoids the need for
locking since at worst it may involve two threads duplicating some marking.
The code ensures that each reachable cell is marked at least once but with
multiple threads a cell may be marked by more than once cell if the
memory is not fully up to date. Each thread has a stack on which it
remembers cells that have been marked but not fully scanned. If a
thread runs out of cells of its own to scan it can pick a pointer off
the stack of another thread and scan that. The original thread will
still scan it some time later but it should find that the addresses
in it have all been marked and it can simply pop this off. This is
all done without locking. Stacks are only modified by the owning
thread and when they pop anything they write zero in its place.
Other threads only need to search for a zero to find if they are
at the top and if they get a pointer that has already been scanned
then this is safe. The only assumption made about the memory is
that all the bits of a word are updated together so that a thread
will always read a value that is a valid pointer.
Many of the ideas are drawn from Flood, Detlefs, Shavit and Zhang 2001
"Parallel Garbage Collection for Shared Memory Multiprocessors".
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#elif defined(_WIN32)
#include "winconfig.h"
#else
#error "No configuration file"
#endif
#ifdef HAVE_ASSERT_H
#include
#define ASSERT(x) assert(x)
#else
#define ASSERT(x)
#endif
#include "globals.h"
#include "processes.h"
#include "gc.h"
#include "scanaddrs.h"
#include "check_objects.h"
#include "bitmap.h"
#include "memmgr.h"
#include "diagnostics.h"
#include "gctaskfarm.h"
#include "profiling.h"
#include "heapsizing.h"
#define MARK_STACK_SIZE 3000
#define LARGECACHE_SIZE 20
class MTGCProcessMarkPointers: public ScanAddress
{
public:
MTGCProcessMarkPointers();
virtual void ScanRuntimeAddress(PolyObject **pt, RtsStrength weak);
virtual PolyObject *ScanObjectAddress(PolyObject *base);
virtual void ScanAddressesInObject(PolyObject *base, POLYUNSIGNED lengthWord);
// Have to redefine this for some reason.
void ScanAddressesInObject(PolyObject *base)
{ ScanAddressesInObject(base, base->LengthWord()); }
- virtual void ScanConstant(PolyObject *base, byte *addressOfConstant, ScanRelocationKind code);
+ virtual void ScanConstant(PolyObject *base, byte *addressOfConstant, ScanRelocationKind code, intptr_t displacement);
// ScanCodeAddressAt should never be called.
POLYUNSIGNED ScanCodeAddressAt(PolyObject **pt) { ASSERT(0); return 0; }
static void MarkPointersTask(GCTaskId *, void *arg1, void *arg2);
static void InitStatics(unsigned threads)
{
markStacks = new MTGCProcessMarkPointers[threads];
nInUse = 0;
nThreads = threads;
}
static void MarkRoots(void);
static bool RescanForStackOverflow();
private:
bool TestForScan(PolyWord *pt);
void MarkAndTestForScan(PolyWord *pt);
void Reset();
void PushToStack(PolyObject *obj, PolyWord *currentPtr = 0)
{
// If we don't have all the threads running we start a new one but
// only once we have several items on the stack. Otherwise we
// can end up creating a task that terminates almost immediately.
if (nInUse >= nThreads || msp < 2 || ! ForkNew(obj))
{
if (msp < MARK_STACK_SIZE)
{
markStack[msp++] = obj;
if (currentPtr != 0)
{
locPtr++;
if (locPtr == LARGECACHE_SIZE) locPtr = 0;
largeObjectCache[locPtr].base = obj;
largeObjectCache[locPtr].current = currentPtr;
}
}
else StackOverflow(obj);
}
// else the new task is processing it.
}
static void StackOverflow(PolyObject *obj);
static bool ForkNew(PolyObject *obj);
PolyObject *markStack[MARK_STACK_SIZE];
unsigned msp;
bool active;
// For the typical small cell it's easier just to rescan from the start
// but that can be expensive for large cells. This caches the offset for
// large cells.
static const POLYUNSIGNED largeObjectSize = 50;
struct { PolyObject *base; PolyWord *current; } largeObjectCache[LARGECACHE_SIZE];
unsigned locPtr;
static MTGCProcessMarkPointers *markStacks;
protected:
static unsigned nThreads, nInUse;
static PLock stackLock;
};
// There is one mark-stack for each GC thread. markStacks[0] is used by the
// main thread when marking the roots and rescanning after mark-stack overflow.
// Once that work is done markStacks[0] is released and is available for a
// worker thread.
MTGCProcessMarkPointers *MTGCProcessMarkPointers::markStacks;
unsigned MTGCProcessMarkPointers::nThreads, MTGCProcessMarkPointers::nInUse;
PLock MTGCProcessMarkPointers::stackLock("GC mark stack");
// It is possible to have two levels of forwarding because
// we could have a cell in the allocation area that has been moved
// to the immutable area and then shared with another cell.
inline PolyObject *FollowForwarding(PolyObject *obj)
{
while (obj->ContainsForwardingPtr())
obj = obj->GetForwardingPtr();
return obj;
}
MTGCProcessMarkPointers::MTGCProcessMarkPointers(): msp(0), active(false), locPtr(0)
{
// Clear the mark stack
for (unsigned i = 0; i < MARK_STACK_SIZE; i++)
markStack[i] = 0;
// Clear the large object cache just to be sure.
for (unsigned j = 0; j < LARGECACHE_SIZE; j++)
{
largeObjectCache[j].base = 0;
largeObjectCache[j].current = 0;
}
}
// Clear the state at the beginning of a new GC pass.
void MTGCProcessMarkPointers::Reset()
{
locPtr = 0;
//largeObjectCache[locPtr].base = 0;
// Clear the cache completely just to be safe
for (unsigned j = 0; j < LARGECACHE_SIZE; j++)
{
largeObjectCache[j].base = 0;
largeObjectCache[j].current = 0;
}
}
// Called when the stack has overflowed. We need to include this
// in the range to be rescanned.
void MTGCProcessMarkPointers::StackOverflow(PolyObject *obj)
{
MarkableSpace *space = (MarkableSpace*)gMem.SpaceForObjectAddress(obj);
ASSERT(space != 0 && (space->spaceType == ST_LOCAL || space->spaceType == ST_CODE));
PLocker lock(&space->spaceLock);
// Have to include this in the range to rescan.
if (space->fullGCRescanStart > ((PolyWord*)obj) - 1)
space->fullGCRescanStart = ((PolyWord*)obj) - 1;
POLYUNSIGNED n = obj->Length();
if (space->fullGCRescanEnd < ((PolyWord*)obj) + n)
space->fullGCRescanEnd = ((PolyWord*)obj) + n;
ASSERT(obj->LengthWord() & _OBJ_GC_MARK); // Should have been marked.
if (debugOptions & DEBUG_GC_ENHANCED)
Log("GC: Mark: Stack overflow. Rescan for %p\n", obj);
}
// Fork a new task. Because we've checked nInUse without taking the lock
// we may find that we can no longer create a new task.
bool MTGCProcessMarkPointers::ForkNew(PolyObject *obj)
{
MTGCProcessMarkPointers *marker = 0;
{
PLocker lock(&stackLock);
if (nInUse == nThreads)
return false;
for (unsigned i = 0; i < nThreads; i++)
{
if (! markStacks[i].active)
{
marker = &markStacks[i];
break;
}
}
ASSERT(marker != 0);
marker->active = true;
nInUse++;
}
bool test = gpTaskFarm->AddWork(&MTGCProcessMarkPointers::MarkPointersTask, marker, obj);
ASSERT(test);
return true;
}
// Main marking task. This is forked off initially to scan a specific object and
// anything reachable from it but once that has finished it tries to find objects
// on other stacks to scan.
void MTGCProcessMarkPointers::MarkPointersTask(GCTaskId *, void *arg1, void *arg2)
{
MTGCProcessMarkPointers *marker = (MTGCProcessMarkPointers*)arg1;
marker->Reset();
marker->ScanAddressesInObject((PolyObject*)arg2);
while (true)
{
// Look for a stack that has at least one item on it.
MTGCProcessMarkPointers *steal = 0;
for (unsigned i = 0; i < nThreads && steal == 0; i++)
{
if (markStacks[i].markStack[0] != 0)
steal = &markStacks[i];
}
// We're finished if they're all done.
if (steal == 0)
break;
// Look for items on this stack
for (unsigned j = 0; j < MARK_STACK_SIZE; j++)
{
// Pick the item off the stack.
// N.B. The owning thread may update this to zero
// at any time.
PolyObject *toSteal = steal->markStack[j];
if (toSteal == 0) break; // Nothing more on the stack
// The idea here is that the original thread pushed this
// because there were at least two addresses it needed to
// process. It started down one branch but left the other.
// Since it will have marked cells in the branch it has
// followed this thread will start on the unprocessed
// address(es).
marker->ScanAddressesInObject(toSteal);
}
}
PLocker lock(&stackLock);
marker->active = false; // It's finished
nInUse--;
ASSERT(marker->markStack[0] == 0);
}
// Tests if this needs to be scanned. It marks it if it has not been marked
// unless it has to be scanned.
bool MTGCProcessMarkPointers::TestForScan(PolyWord *pt)
{
if ((*pt).IsTagged())
return false;
// This could contain a forwarding pointer if it points into an
// allocation area and has been moved by the minor GC.
// We have to be a little careful. Another thread could also
// be following any forwarding pointers here. However it's safe
// because they will update it with the same value.
PolyObject *obj = (*pt).AsObjPtr();
if (obj->ContainsForwardingPtr())
{
obj = FollowForwarding(obj);
*pt = obj;
}
MemSpace *sp = gMem.SpaceForObjectAddress(obj);
if (sp == 0 || (sp->spaceType != ST_LOCAL && sp->spaceType != ST_CODE))
return false; // Ignore it if it points to a permanent area
POLYUNSIGNED L = obj->LengthWord();
if (L & _OBJ_GC_MARK)
return false; // Already marked
if (debugOptions & DEBUG_GC_DETAIL)
Log("GC: Mark: %p %" POLYUFMT " %u\n", obj, OBJ_OBJECT_LENGTH(L), GetTypeBits(L));
if (OBJ_IS_BYTE_OBJECT(L))
{
obj->SetLengthWord(L | _OBJ_GC_MARK); // Mark it
return false; // We've done as much as we need
}
return true;
}
void MTGCProcessMarkPointers::MarkAndTestForScan(PolyWord *pt)
{
if (TestForScan(pt))
{
PolyObject *obj = (*pt).AsObjPtr();
obj->SetLengthWord(obj->LengthWord() | _OBJ_GC_MARK);
}
}
// The initial entry to process the roots. These may be RTS addresses or addresses in
// a thread stack. Also called recursively to process the addresses of constants in
// code segments. This is used in situations where a scanner may return the
// updated address of an object.
PolyObject *MTGCProcessMarkPointers::ScanObjectAddress(PolyObject *obj)
{
MemSpace *sp = gMem.SpaceForAddress((PolyWord*)obj-1);
if (!(sp->spaceType == ST_LOCAL || sp->spaceType == ST_CODE))
return obj; // Ignore it if it points to a permanent area
// We may have a forwarding pointer if this has been moved by the
// minor GC.
if (obj->ContainsForwardingPtr())
{
obj = FollowForwarding(obj);
sp = gMem.SpaceForAddress((PolyWord*)obj - 1);
}
ASSERT(obj->ContainsNormalLengthWord());
POLYUNSIGNED L = obj->LengthWord();
if (L & _OBJ_GC_MARK)
return obj; // Already marked
sp->writeAble(obj)->SetLengthWord(L | _OBJ_GC_MARK); // Mark it
if (profileMode == kProfileLiveData || (profileMode == kProfileLiveMutables && obj->IsMutable()))
AddObjectProfile(obj);
POLYUNSIGNED n = OBJ_OBJECT_LENGTH(L);
if (debugOptions & DEBUG_GC_DETAIL)
Log("GC: Mark: %p %" POLYUFMT " %u\n", obj, n, GetTypeBits(L));
if (OBJ_IS_BYTE_OBJECT(L))
return obj;
// If we already have something on the stack we must being called
// recursively to process a constant in a code segment. Just push
// it on the stack and let the caller deal with it.
if (msp != 0)
PushToStack(obj); // Can't check this because it may have forwarding ptrs.
else
{
// Normally a root but this can happen if we're following constants in code.
// In that case we want to make sure that we don't recurse too deeply and
// overflow the C stack. Push the address to the stack before calling
// ScanAddressesInObject so that if we come back here msp will be non-zero.
// ScanAddressesInObject will empty the stack.
PushToStack(obj);
MTGCProcessMarkPointers::ScanAddressesInObject(obj, L);
// We can only check after we've processed it because if we
// have addresses left over from an incomplete partial GC they
// may need to forwarded.
CheckObject (obj);
}
return obj;
}
// These functions are only called with pointers held by the runtime system.
// Weak references can occur in the runtime system, eg. streams and windows.
// Weak references are not marked and so unreferenced streams and windows
// can be detected and closed.
void MTGCProcessMarkPointers::ScanRuntimeAddress(PolyObject **pt, RtsStrength weak)
{
if (weak == STRENGTH_WEAK) return;
*pt = ScanObjectAddress(*pt);
CheckPointer (*pt); // Check it after any forwarding pointers have been followed.
}
// This is called via ScanAddressesInRegion to process the permanent mutables. It is
// also called from ScanObjectAddress to process root addresses.
// It processes all the addresses reachable from the object.
// This is almost the same as RecursiveScan::ScanAddressesInObject.
void MTGCProcessMarkPointers::ScanAddressesInObject(PolyObject *obj, POLYUNSIGNED lengthWord)
{
if (OBJ_IS_BYTE_OBJECT(lengthWord))
return;
while (true)
{
ASSERT (OBJ_IS_LENGTH(lengthWord));
POLYUNSIGNED length = OBJ_OBJECT_LENGTH(lengthWord);
PolyWord *baseAddr = (PolyWord*)obj;
PolyWord *endWord = baseAddr + length;
if (OBJ_IS_WEAKREF_OBJECT(lengthWord))
{
// Special case.
ASSERT(OBJ_IS_MUTABLE_OBJECT(lengthWord)); // Should be a mutable.
ASSERT(OBJ_IS_WORD_OBJECT(lengthWord)); // Should be a plain object.
// We need to mark the "SOME" values in this object but we don't mark
// the references contained within the "SOME".
// Mark every word but ignore the result.
for (POLYUNSIGNED i = 0; i < length; i++)
(void)MarkAndTestForScan(baseAddr+i);
// We've finished with this.
endWord = baseAddr;
}
else if (OBJ_IS_CODE_OBJECT(lengthWord))
{
// Code addresses in the native code versions.
// Closure cells are normal (word) objects and code addresses are normal addresses.
// It's better to process the whole code object in one go.
ScanAddress::ScanAddressesInObject(obj, lengthWord);
endWord = baseAddr; // Finished
}
else if (OBJ_IS_CLOSURE_OBJECT(lengthWord))
{
// Closure cells in 32-in-64.
// The first word is the absolute address of the code ...
PolyObject *codeAddr = *(PolyObject**)obj;
// except that it is possible we haven't yet set it.
if (((uintptr_t)codeAddr & 1) == 0)
ScanObjectAddress(codeAddr);
// The rest is a normal tuple.
baseAddr += sizeof(PolyObject*) / sizeof(PolyWord);
}
// If there are only two addresses in this cell that need to be
// followed we follow them immediately and treat this cell as done.
// If there are more than two we push the address of this cell on
// the stack, follow the first address and then rescan it. That way
// list cells are processed once only but we don't overflow the
// stack by pushing all the addresses in a very large vector.
PolyObject *firstWord = 0;
PolyObject *secondWord = 0;
PolyWord *restartAddr = 0;
if (obj == largeObjectCache[locPtr].base)
{
baseAddr = largeObjectCache[locPtr].current;
ASSERT(baseAddr > (PolyWord*)obj && baseAddr < endWord);
if (locPtr == 0) locPtr = LARGECACHE_SIZE - 1; else locPtr--;
}
while (baseAddr != endWord)
{
PolyWord wordAt = *baseAddr;
if (wordAt.IsDataPtr() && wordAt != PolyWord::FromUnsigned(0))
{
// Normal address. We can have words of all zeros at least in the
// situation where we have a partially constructed code segment where
// the constants at the end of the code have not yet been filled in.
if (TestForScan(baseAddr))
{
if (firstWord == 0)
firstWord = baseAddr->AsObjPtr();
else if (secondWord == 0)
{
// If we need to rescan because there are three or more words to do
// this is the place we need to restart (or the start of the cell if it's
// small).
restartAddr = baseAddr;
secondWord = baseAddr->AsObjPtr();
}
else break; // More than two words.
}
}
baseAddr++;
}
if (baseAddr != endWord)
// Put this back on the stack while we process the first word
PushToStack(obj, length < largeObjectSize ? 0 : restartAddr);
else if (secondWord != 0)
{
// Mark it now because we will process it.
PolyObject* writeAble = secondWord;
if (secondWord->IsCodeObject())
writeAble = gMem.SpaceForObjectAddress(secondWord)->writeAble(secondWord);
writeAble->SetLengthWord(secondWord->LengthWord() | _OBJ_GC_MARK);
// Put this on the stack. If this is a list node we will be
// pushing the tail.
PushToStack(secondWord);
}
if (firstWord != 0)
{
// Mark it and process it immediately.
PolyObject* writeAble = firstWord;
if (firstWord->IsCodeObject())
writeAble = gMem.SpaceForObjectAddress(firstWord)->writeAble(firstWord);
writeAble->SetLengthWord(firstWord->LengthWord() | _OBJ_GC_MARK);
obj = firstWord;
}
else if (msp == 0)
{
markStack[msp] = 0; // Really finished
return;
}
else
{
// Clear the item above the top. This really is finished.
if (msp < MARK_STACK_SIZE) markStack[msp] = 0;
// Pop the item from the stack but don't overwrite it yet.
// This allows another thread to steal it if there really
// is nothing else to do. This is only really important
// for large objects.
obj = markStack[--msp]; // Pop something.
}
lengthWord = obj->LengthWord();
}
}
// Process a constant within the code. This is a direct copy of ScanAddress::ScanConstant
// with the addition of the locking.
-void MTGCProcessMarkPointers::ScanConstant(PolyObject *base, byte *addressOfConstant, ScanRelocationKind code)
+void MTGCProcessMarkPointers::ScanConstant(PolyObject *base, byte *addressOfConstant, ScanRelocationKind code, intptr_t displacement)
{
// If we have newly compiled code the constants may be in the
// local heap. MTGCProcessMarkPointers::ScanObjectAddress can
// return an updated address for a local address if there is a
// forwarding pointer.
// Constants can be aligned on any byte offset so another thread
// scanning the same code could see an invalid address if it read
// the constant while it was being updated. We put a lock round
// this just in case.
MemSpace *space = gMem.SpaceForAddress(addressOfConstant);
PLock *lock = 0;
if (space->spaceType == ST_CODE)
lock = &((CodeSpace*)space)->spaceLock;
if (lock != 0)
lock->Lock();
- PolyObject *p = GetConstantValue(addressOfConstant, code);
+ PolyObject *p = GetConstantValue(addressOfConstant, code, displacement);
if (lock != 0)
lock->Unlock();
if (p != 0)
{
PolyObject *newVal = ScanObjectAddress(p);
if (newVal != p) // Update it if it has changed.
{
if (lock != 0)
lock->Lock();
SetConstantValue(addressOfConstant, newVal, code);
if (lock != 0)
lock->Unlock();
}
}
}
// Mark all the roots. This is run in the main thread and has the effect
// of starting new tasks as the scanning runs.
void MTGCProcessMarkPointers::MarkRoots(void)
{
ASSERT(nThreads >= 1);
ASSERT(nInUse == 0);
MTGCProcessMarkPointers *marker = &markStacks[0];
marker->Reset();
marker->active = true;
nInUse = 1;
// Scan the permanent mutable areas.
for (std::vector::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++)
{
PermanentMemSpace *space = *i;
if (space->isMutable && ! space->byteOnly)
marker->ScanAddressesInRegion(space->bottom, space->top);
}
// Scan the RTS roots.
GCModules(marker);
ASSERT(marker->markStack[0] == 0);
// When this has finished there may well be other tasks running.
PLocker lock(&stackLock);
marker->active = false;
nInUse--;
}
// This class just allows us to use ScanAddress::ScanAddressesInRegion to call
// ScanAddressesInObject for each object in the region.
class Rescanner: public ScanAddress
{
public:
Rescanner(MTGCProcessMarkPointers *marker): m_marker(marker) {}
virtual void ScanAddressesInObject(PolyObject *obj, POLYUNSIGNED lengthWord)
{
// If it has previously been marked it is known to be reachable but
// the contents may not have been scanned if the stack overflowed.
if (lengthWord &_OBJ_GC_MARK)
m_marker->ScanAddressesInObject(obj, lengthWord);
}
// Have to define this.
virtual PolyObject *ScanObjectAddress(PolyObject *base) { ASSERT(false); return 0; }
virtual POLYUNSIGNED ScanCodeAddressAt(PolyObject **pt) { ASSERT(false); return 0; }
bool ScanSpace(MarkableSpace *space);
private:
MTGCProcessMarkPointers *m_marker;
};
// Rescan any marked objects in the area between fullGCRescanStart and fullGCRescanEnd.
// N.B. We may have threads already processing other areas and they could overflow
// their stacks and change fullGCRescanStart or fullGCRescanEnd.
bool Rescanner::ScanSpace(MarkableSpace *space)
{
PolyWord *start, *end;
{
PLocker lock(&space->spaceLock);
start = space->fullGCRescanStart;
end = space->fullGCRescanEnd;
space->fullGCRescanStart = space->top;
space->fullGCRescanEnd = space->bottom;
}
if (start < end)
{
if (debugOptions & DEBUG_GC_ENHANCED)
Log("GC: Mark: Rescanning from %p to %p\n", start, end);
ScanAddressesInRegion(start, end);
return true; // Require rescan
}
else return false;
}
// When the threads created by marking the roots have completed we need to check that
// the mark stack has not overflowed. If it has we need to rescan. This rescanning
// pass may result in a further overflow so if we find we have to rescan we repeat.
bool MTGCProcessMarkPointers::RescanForStackOverflow()
{
ASSERT(nThreads >= 1);
ASSERT(nInUse == 0);
MTGCProcessMarkPointers *marker = &markStacks[0];
marker->Reset();
marker->active = true;
nInUse = 1;
bool rescan = false;
Rescanner rescanner(marker);
for (std::vector::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
{
if (rescanner.ScanSpace(*i))
rescan = true;
}
for (std::vector::iterator i = gMem.cSpaces.begin(); i < gMem.cSpaces.end(); i++)
{
if (rescanner.ScanSpace(*i))
rescan = true;
}
{
PLocker lock(&stackLock);
nInUse--;
marker->active = false;
}
return rescan;
}
static void SetBitmaps(LocalMemSpace *space, PolyWord *pt, PolyWord *top)
{
while (pt < top)
{
#ifdef POLYML32IN64
if ((((uintptr_t)pt) & 4) == 0)
{
pt++;
continue;
}
#endif
PolyObject *obj = (PolyObject*)++pt;
// If it has been copied by a minor collection skip it
if (obj->ContainsForwardingPtr())
{
obj = FollowForwarding(obj);
ASSERT(obj->ContainsNormalLengthWord());
pt += obj->Length();
}
else
{
POLYUNSIGNED L = obj->LengthWord();
POLYUNSIGNED n = OBJ_OBJECT_LENGTH(L);
if (L & _OBJ_GC_MARK)
{
obj->SetLengthWord(L & ~(_OBJ_GC_MARK));
uintptr_t bitno = space->wordNo(pt);
space->bitmap.SetBits(bitno - 1, n + 1);
if (OBJ_IS_MUTABLE_OBJECT(L))
space->m_marked += n + 1;
else
space->i_marked += n + 1;
if ((PolyWord*)obj <= space->fullGCLowerLimit)
space->fullGCLowerLimit = (PolyWord*)obj-1;
if (OBJ_IS_WEAKREF_OBJECT(L))
{
// Add this to the limits for the containing area.
PolyWord *baseAddr = (PolyWord*)obj;
PolyWord *startAddr = baseAddr-1; // Must point AT length word.
PolyWord *endObject = baseAddr + n;
if (startAddr < space->lowestWeak) space->lowestWeak = startAddr;
if (endObject > space->highestWeak) space->highestWeak = endObject;
}
}
pt += n;
}
}
}
static void CreateBitmapsTask(GCTaskId *, void *arg1, void *arg2)
{
LocalMemSpace *lSpace = (LocalMemSpace *)arg1;
lSpace->bitmap.ClearBits(0, lSpace->spaceSize());
SetBitmaps(lSpace, lSpace->bottom, lSpace->top);
}
// Parallel task to check the marks on cells in the code area and
// turn them into byte areas if they are free.
static void CheckMarksOnCodeTask(GCTaskId *, void *arg1, void *arg2)
{
CodeSpace *space = (CodeSpace*)arg1;
#ifdef POLYML32IN64
PolyWord *pt = space->bottom+1;
#else
PolyWord *pt = space->bottom;
#endif
PolyWord *lastFree = 0;
POLYUNSIGNED lastFreeSpace = 0;
space->largestFree = 0;
space->firstFree = 0;
while (pt < space->top)
{
PolyObject *obj = (PolyObject*)(pt+1);
// There should not be forwarding pointers
ASSERT(obj->ContainsNormalLengthWord());
POLYUNSIGNED L = obj->LengthWord();
POLYUNSIGNED length = OBJ_OBJECT_LENGTH(L);
if (L & _OBJ_GC_MARK)
{
// It's marked - retain it.
ASSERT(L & _OBJ_CODE_OBJ);
space->writeAble(obj)->SetLengthWord(L & ~(_OBJ_GC_MARK)); // Clear the mark bit
lastFree = 0;
lastFreeSpace = 0;
}
#ifdef POLYML32IN64
else if (length == 0)
{
// We may have zero filler words to set the correct alignment.
// Merge them into a previously free area otherwise leave
// them if they're after something allocated.
if (lastFree + lastFreeSpace == pt)
{
lastFreeSpace += length + 1;
PolyObject *freeSpace = (PolyObject*)(lastFree + 1);
space->writeAble(freeSpace)->SetLengthWord(lastFreeSpace - 1, F_BYTE_OBJ);
}
}
#endif
else { // Turn it into a byte area i.e. free. It may already be free.
if (space->firstFree == 0) space->firstFree = pt;
space->headerMap.ClearBit(pt-space->bottom); // Remove the "header" bit
if (lastFree + lastFreeSpace == pt)
// Merge free spaces. Speeds up subsequent scans.
lastFreeSpace += length + 1;
else
{
lastFree = pt;
lastFreeSpace = length + 1;
}
PolyObject *freeSpace = (PolyObject*)(lastFree+1);
space->writeAble(freeSpace)->SetLengthWord(lastFreeSpace-1, F_BYTE_OBJ);
if (lastFreeSpace > space->largestFree) space->largestFree = lastFreeSpace;
}
pt += length+1;
}
}
void GCMarkPhase(void)
{
mainThreadPhase = MTP_GCPHASEMARK;
// Clear the mark counters and set the rescan limits.
for(std::vector::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
{
LocalMemSpace *lSpace = *i;
lSpace->i_marked = lSpace->m_marked = 0;
lSpace->fullGCRescanStart = lSpace->top;
lSpace->fullGCRescanEnd = lSpace->bottom;
}
for (std::vector::iterator i = gMem.cSpaces.begin(); i < gMem.cSpaces.end(); i++)
{
CodeSpace *space = *i;
space->fullGCRescanStart = space->top;
space->fullGCRescanEnd = space->bottom;
}
MTGCProcessMarkPointers::MarkRoots();
gpTaskFarm->WaitForCompletion();
// Do we have to rescan because the mark stack overflowed?
bool rescan;
do {
rescan = MTGCProcessMarkPointers::RescanForStackOverflow();
gpTaskFarm->WaitForCompletion();
} while(rescan);
gHeapSizeParameters.RecordGCTime(HeapSizeParameters::GCTimeIntermediate, "Mark");
// Turn the marks into bitmap entries.
for (std::vector::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
gpTaskFarm->AddWorkOrRunNow(&CreateBitmapsTask, *i, 0);
// Process the code areas.
for (std::vector::iterator i = gMem.cSpaces.begin(); i < gMem.cSpaces.end(); i++)
gpTaskFarm->AddWorkOrRunNow(&CheckMarksOnCodeTask, *i, 0);
gpTaskFarm->WaitForCompletion(); // Wait for completion of the bitmaps
gMem.RemoveEmptyCodeAreas();
gHeapSizeParameters.RecordGCTime(HeapSizeParameters::GCTimeIntermediate, "Bitmap");
uintptr_t totalLive = 0;
for(std::vector::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
{
LocalMemSpace *lSpace = *i;
if (! lSpace->isMutable) ASSERT(lSpace->m_marked == 0);
totalLive += lSpace->m_marked + lSpace->i_marked;
if (debugOptions & DEBUG_GC_ENHANCED)
Log("GC: Mark: %s space %p: %" POLYUFMT " immutable words marked, %" POLYUFMT " mutable words marked\n",
lSpace->spaceTypeString(), lSpace,
lSpace->i_marked, lSpace->m_marked);
}
if (debugOptions & DEBUG_GC)
Log("GC: Mark: Total live data %" POLYUFMT " words\n", totalLive);
}
// Set up the stacks.
void initialiseMarkerTables()
{
unsigned threads = gpTaskFarm->ThreadCount();
if (threads == 0) threads = 1;
MTGCProcessMarkPointers::InitStatics(threads);
}
diff --git a/libpolyml/globals.h b/libpolyml/globals.h
index 5a0eb3a1..a634d946 100644
--- a/libpolyml/globals.h
+++ b/libpolyml/globals.h
@@ -1,428 +1,409 @@
/*
Title: Globals for the system.
Author: Dave Matthews, Cambridge University Computer Laboratory
Copyright David C. J. Matthews 2017-20
Copyright (c) 2000-7
Cambridge University Technical Services Limited
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef _GLOBALS_H
#define _GLOBALS_H
/*
Poly words, pointers and cells (objects).
The garbage collector needs to be able to distinguish different uses of a
memory word. We need to be able find which words are pointers to other
objects and which are simple integers. The simple distinction is between
integers, which are tagged by having the bottom bit set, and Addresses
which are word aligned (bottom 2 bits zero on a 32 bit machine, bottom 3
bits on a 64 bit machine, bottom bit in 32-in-64).
Addresses always point to the start of cells. The preceding word of a
cell is the length word. This contains the
length of the cell in words in the low-order 3 (7 in native 64-bits)
bytes and a flag byte in the top byte. The flags give information about
the type of the object. The length word is also used by the garbage
collector and other object processors.
*/
#if HAVE_STDINT_H
# include
#endif
#if HAVE_INTTYPES_H
# ifndef __STDC_FORMAT_MACROS
# define __STDC_FORMAT_MACROS
# endif
# include
#elif (defined(_MSC_VER) && (_MSC_VER >= 1900))
// In VS 2015 and later we need to use
# include
#endif
#ifdef HAVE_STDDEF_H
# include
#endif
#define POLY_TAGSHIFT 1
#if (defined(_WIN32))
# include
#endif
#ifdef POLYML32IN64
typedef int32_t POLYSIGNED;
typedef uint32_t POLYUNSIGNED;
#define SIZEOF_POLYWORD 4
#else
typedef intptr_t POLYSIGNED;
typedef uintptr_t POLYUNSIGNED;
#define SIZEOF_POLYWORD SIZEOF_VOIDP
#endif
// libpolyml uses printf-style I/O instead of C++ standard IOstreams,
// so we need specifier to format POLYUNSIGNED/POLYSIGNED values.
#ifdef POLYML32IN64
#if (defined(PRIu32))
# define POLYUFMT PRIu32
# define POLYSFMT PRId32
#elif (defined(_MSC_VER))
# define POLYUFMT "lu"
# define POLYSFMT "ld"
#else
# define POLYUFMT "u"
# define POLYSFMT "d"
#endif
#elif (defined(PRIuPTR))
# define POLYUFMT PRIuPTR
# define POLYSFMT PRIdPTR
#elif (defined(_MSC_VER) && (SIZEOF_POLYWORD == 8))
# define POLYUFMT "llu"
# define POLYSFMT "lld"
#else
# define POLYUFMT "lu" // as before. Cross your fingers.
# define POLYSFMT "ld" // idem.
#endif
// We can use the C99 %zu in most cases except MingW since it uses
// the old msvcrt and that only supports C89.
#if (defined(_WIN32) && (! defined(_MSC_VER) || _MSC_VER < 1800))
# if (SIZEOF_VOIDP == 8)
# define PRI_SIZET PRIu64
# else
# define PRI_SIZET PRIu32
# endif
#else
# define PRI_SIZET "zu"
#endif
typedef unsigned char byte;
class PolyObject;
typedef PolyObject *POLYOBJPTR;
#ifdef POLYML32IN64
class PolyWord;
extern PolyWord *globalHeapBase, *globalCodeBase;
typedef uint32_t POLYOBJECTPTR; // This is an index into globalHeapBase
// If a 64-bit value if in the range of the object pointers.
inline bool IsHeapAddress(void *addr) { return (uintptr_t)addr <= 0xffffffff; }
#else
typedef POLYOBJPTR POLYOBJECTPTR;
inline bool IsHeapAddress(void *) { return true; }
#endif
typedef byte *POLYCODEPTR;
class PolyWord {
public:
// Initialise to TAGGED(0). This is very rarely used.
PolyWord() { contents.unsignedInt = 1; }
// Integers need to be tagged.
static PolyWord TaggedInt(POLYSIGNED s) { return PolyWord((s << POLY_TAGSHIFT) | (POLYSIGNED)0x01); }
static PolyWord TaggedUnsigned(POLYUNSIGNED u) { return PolyWord((u << POLY_TAGSHIFT) | 0x01); }
static PolyWord FromStackAddr(PolyWord *sp) { return PolyWord(sp); }
static PolyWord FromCodePtr(POLYCODEPTR p) { return PolyWord(p); }
// Tests for the various cases.
bool IsTagged(void) const { return (contents.unsignedInt & 1) != 0; }
#ifndef POLYML32IN64
// In native 32-bit and 64-bit addresses are on word boundaries
bool IsDataPtr(void) const { return (contents.unsignedInt & (sizeof(PolyWord) - 1)) == 0; }
#else
// In 32-in-64 addresses are anything that isn't tagged.
bool IsDataPtr(void) const { return (contents.unsignedInt & 1) == 0; }
#ifdef POLYML32IN64DEBUG
static POLYOBJECTPTR AddressToObjectPtr(void *address);
#else
static POLYOBJECTPTR AddressToObjectPtr(void *address)
{ return (POLYOBJECTPTR)((PolyWord*)address - globalHeapBase); }
#endif
#endif
// Extract the various cases.
POLYSIGNED UnTagged(void) const { return contents.signedInt >> POLY_TAGSHIFT; }
POLYUNSIGNED UnTaggedUnsigned(void) const { return contents.unsignedInt >> POLY_TAGSHIFT; }
#ifdef POLYML32IN64
PolyWord(POLYOBJPTR p) { contents.objectPtr = AddressToObjectPtr(p); }
PolyWord *AsStackAddr(PolyWord *base = globalHeapBase) const { return base + contents.objectPtr; }
POLYOBJPTR AsObjPtr(PolyWord *base = globalHeapBase) const { return (POLYOBJPTR)AsStackAddr(base); }
#else
// An object pointer can become a word directly.
PolyWord(POLYOBJPTR p) { contents.objectPtr = p; }
POLYOBJPTR AsObjPtr(PolyWord *base = 0) const { return contents.objectPtr; }
PolyWord *AsStackAddr(PolyWord *base=0) const { return (PolyWord *)contents.objectPtr; }
#endif
POLYCODEPTR AsCodePtr(void) const { return (POLYCODEPTR)AsObjPtr(); }
void *AsAddress(void)const { return AsCodePtr(); }
// There are a few cases where we need to store and extract untagged values
static PolyWord FromUnsigned(POLYUNSIGNED u) { return PolyWord(u); }
static PolyWord FromSigned(POLYSIGNED s) { return PolyWord(s); }
POLYUNSIGNED AsUnsigned(void) const { return contents.unsignedInt; }
POLYSIGNED AsSigned(void) const { return contents.signedInt; }
protected:
PolyWord(POLYSIGNED s) { contents.signedInt = s; }
PolyWord(POLYUNSIGNED u) { contents.unsignedInt = u; }
public:
bool operator == (PolyWord b) const { return contents.unsignedInt == b.contents.unsignedInt; }
bool operator != (PolyWord b) const { return contents.unsignedInt != b.contents.unsignedInt; }
protected:
#ifdef POLYML32IN64
PolyWord(PolyWord *sp) { contents.objectPtr = AddressToObjectPtr(sp); }
PolyWord(POLYCODEPTR p) { contents.objectPtr = AddressToObjectPtr(p); }
#else
PolyWord(PolyWord *sp) { contents.objectPtr = (PolyObject*)sp; }
PolyWord(POLYCODEPTR p) { contents.objectPtr = (PolyObject*)p; }
#endif
union {
POLYSIGNED signedInt; // A tagged integer - lowest bit set
POLYUNSIGNED unsignedInt; // A tagged integer - lowest bit set
POLYOBJECTPTR objectPtr; // Object pointer - lowest bit clear.
} contents;
};
//typedef PolyWord POLYWORD;
inline bool OBJ_IS_AN_INTEGER(const PolyWord & a) { return a.IsTagged(); }
inline bool OBJ_IS_DATAPTR(const PolyWord & a) { return a.IsDataPtr(); }
// The maximum tagged signed number is one less than 0x80 shifted into the top byte then shifted down
// by the tag shift.
#define MAXTAGGED (((POLYSIGNED)0x80 << (POLYSIGNED)(8*(sizeof(PolyWord)-1) -POLY_TAGSHIFT)) -1)
inline PolyWord TAGGED(POLYSIGNED a) { return PolyWord::TaggedInt(a); }
inline POLYSIGNED UNTAGGED(PolyWord a) { return a.UnTagged(); }
inline POLYUNSIGNED UNTAGGED_UNSIGNED(PolyWord a) { return a.UnTaggedUnsigned(); }
#define IS_INT(x) ((x).IsTagged())
/* length word flags */
#define OBJ_PRIVATE_FLAGS_SHIFT (8 * (sizeof(PolyWord) - 1))
#define _TOP_BYTE(x) ((POLYUNSIGNED)(x) << OBJ_PRIVATE_FLAGS_SHIFT)
// Bottom two bits define the content format.
// Zero bits mean ordinary word object containing addresses or tagged integers.
#define F_BYTE_OBJ 0x01 /* byte object (contains no pointers) */
#define F_CODE_OBJ 0x02 /* code object (mixed bytes and words) */
#define F_CLOSURE_OBJ 0x03 /* closure (32-in-64 only). First word is code addr. */
#define F_GC_MARK 0x04 // Used during the GC marking phase
#define F_NO_OVERWRITE 0x08 /* don't overwrite when loading - mutables only. */
// This bit is overloaded and has different meanings depending on what other bits are set.
// For byte objects it is the sign bit for arbitrary precision ints.
// For other data it indicates either that the object is a profile block or contains
// information for allocation profiling.
#define F_NEGATIVE_BIT 0x10 // Sign bit for arbitrary precision ints (byte segs only)
#define F_PROFILE_BIT 0x10 // Object has a profile pointer (word segs only)
#define F_WEAK_BIT 0x20 /* object contains weak references to option values. */
// The Weak bit is only used on mutables. The data sharing (sharedata.cpp) uses this with
// immutables to indicate that the length field is being used to store the "depth".
#define F_MUTABLE_BIT 0x40 /* object is mutable */
#define F_TOMBSTONE_BIT 0x80 // Object is a forwarding pointer
#define F_PRIVATE_FLAGS_MASK 0xFF
// Shifted bits
#define _OBJ_BYTE_OBJ _TOP_BYTE(F_BYTE_OBJ) /* byte object (contains no pointers) */
#define _OBJ_CODE_OBJ _TOP_BYTE(F_CODE_OBJ) /* code object (mixed bytes and words) */
#define _OBJ_CLOSURE_OBJ _TOP_BYTE(F_CLOSURE_OBJ) // closure (32-in-64 only). First word is code addr.
#define _OBJ_GC_MARK _TOP_BYTE(F_GC_MARK) // Mark bit
#define _OBJ_NO_OVERWRITE _TOP_BYTE(F_NO_OVERWRITE) /* don't overwrite when loading - mutables only. */
#define _OBJ_NEGATIVE_BIT _TOP_BYTE(F_NEGATIVE_BIT) /* sign bit for arbitrary precision ints */
#define _OBJ_PROFILE_BIT _TOP_BYTE(F_PROFILE_BIT) /* sign bit for arbitrary precision ints */
#define _OBJ_WEAK_BIT _TOP_BYTE(F_WEAK_BIT)
#define _OBJ_MUTABLE_BIT _TOP_BYTE(F_MUTABLE_BIT) /* object is mutable */
#define _OBJ_TOMBSTONE_BIT _TOP_BYTE(F_TOMBSTONE_BIT) // object is a tombstone.
#define _OBJ_PRIVATE_FLAGS_MASK _TOP_BYTE(F_PRIVATE_FLAGS_MASK)
#define _OBJ_PRIVATE_LENGTH_MASK ((-1) ^ _OBJ_PRIVATE_FLAGS_MASK)
#define MAX_OBJECT_SIZE _OBJ_PRIVATE_LENGTH_MASK
//
inline bool OBJ_IS_LENGTH(POLYUNSIGNED L) { return ((L & _OBJ_TOMBSTONE_BIT) == 0); }
/* these should only be applied to proper length words */
/* discards GC flag, mutable bit and weak bit. */
inline byte GetTypeBits(POLYUNSIGNED L) { return (byte)(L >> OBJ_PRIVATE_FLAGS_SHIFT) & 0x03; }
inline POLYUNSIGNED OBJ_OBJECT_LENGTH(POLYUNSIGNED L) { return L & _OBJ_PRIVATE_LENGTH_MASK; }
inline bool OBJ_IS_BYTE_OBJECT(POLYUNSIGNED L) { return (GetTypeBits(L) == F_BYTE_OBJ); }
inline bool OBJ_IS_CODE_OBJECT(POLYUNSIGNED L) { return (GetTypeBits(L) == F_CODE_OBJ); }
inline bool OBJ_IS_CLOSURE_OBJECT(POLYUNSIGNED L) { return (GetTypeBits(L) == F_CLOSURE_OBJ); }
inline bool OBJ_IS_NO_OVERWRITE(POLYUNSIGNED L) { return ((L & _OBJ_NO_OVERWRITE) != 0); }
inline bool OBJ_IS_NEGATIVE(POLYUNSIGNED L) { return ((L & _OBJ_NEGATIVE_BIT) != 0); }
inline bool OBJ_HAS_PROFILE(POLYUNSIGNED L) { return ((L & _OBJ_PROFILE_BIT) != 0); }
inline bool OBJ_IS_MUTABLE_OBJECT(POLYUNSIGNED L) { return ((L & _OBJ_MUTABLE_BIT) != 0); }
inline bool OBJ_IS_WEAKREF_OBJECT(POLYUNSIGNED L) { return ((L & _OBJ_WEAK_BIT) != 0); }
/* Don't need to worry about whether shift is signed,
because OBJ_PRIVATE_USER_FLAGS_MASK removes the sign bit.
We don't want the GC bit (which should be 0) anyway.
*/
#define OBJ_PRIVATE_USER_FLAGS_MASK _TOP_BYTE(0x7F)
#define OBJ_IS_WORD_OBJECT(L) (GetTypeBits(L) == 0)
/* case 2 - forwarding pointer */
inline bool OBJ_IS_POINTER(POLYUNSIGNED L) { return (L & _OBJ_TOMBSTONE_BIT) != 0; }
#ifdef POLYML32IN64
inline PolyObject *OBJ_GET_POINTER(POLYUNSIGNED L) { return (PolyObject*)(globalHeapBase + ((L & ~_OBJ_TOMBSTONE_BIT) << 1)); }
inline POLYUNSIGNED OBJ_SET_POINTER(PolyObject *pt) { return PolyWord::AddressToObjectPtr(pt) >> 1 | _OBJ_TOMBSTONE_BIT; }
#else
inline PolyObject *OBJ_GET_POINTER(POLYUNSIGNED L) { return (PolyObject*)(( L & ~_OBJ_TOMBSTONE_BIT) <<2); }
inline POLYUNSIGNED OBJ_SET_POINTER(PolyObject *pt) { return ((POLYUNSIGNED)pt >> 2) | _OBJ_TOMBSTONE_BIT; }
#endif
// An object i.e. a piece of allocated memory in the heap. In the simplest case this is a
// tuple, a list cons cell, a string or a ref. Every object has a length word in the word before
// where its address points. The top byte of this contains flags.
class PolyObject {
public:
byte *AsBytePtr(void)const { return (byte*)this; }
PolyWord *AsWordPtr(void)const { return (PolyWord*)this; }
POLYUNSIGNED LengthWord(void)const { return ((PolyWord*)this)[-1].AsUnsigned(); }
POLYUNSIGNED Length(void)const { return OBJ_OBJECT_LENGTH(LengthWord()); }
// Get and set a word
PolyWord Get(POLYUNSIGNED i) const { return ((PolyWord*)this)[i]; }
void Set(POLYUNSIGNED i, PolyWord v) { ((PolyWord*)this)[i] = v; }
PolyWord *Offset(POLYUNSIGNED i) const { return ((PolyWord*)this)+i; }
// Create a length word from a length and the flags in the top byte.
void SetLengthWord(POLYUNSIGNED l, byte f)
{ ((POLYUNSIGNED*)this)[-1] = l | ((POLYUNSIGNED)f << OBJ_PRIVATE_FLAGS_SHIFT); }
void SetLengthWord(POLYUNSIGNED l) { ((PolyWord*)this)[-1] = PolyWord::FromUnsigned(l); }
bool IsByteObject(void) const { return OBJ_IS_BYTE_OBJECT(LengthWord()); }
bool IsCodeObject(void) const { return OBJ_IS_CODE_OBJECT(LengthWord()); }
bool IsClosureObject(void) const { return OBJ_IS_CLOSURE_OBJECT(LengthWord()); }
bool IsWordObject(void) const { return OBJ_IS_WORD_OBJECT(LengthWord()); }
bool IsMutable(void) const { return OBJ_IS_MUTABLE_OBJECT(LengthWord()); }
bool IsWeakRefObject(void) const { return OBJ_IS_WEAKREF_OBJECT(LengthWord()); }
bool IsNoOverwriteObject(void) const { return OBJ_IS_NO_OVERWRITE(LengthWord()); }
bool ContainsForwardingPtr(void) const { return OBJ_IS_POINTER(LengthWord()); }
PolyObject *GetForwardingPtr(void) const { return OBJ_GET_POINTER(LengthWord()); }
void SetForwardingPtr(PolyObject *newp) { ((PolyWord*)this)[-1] = PolyWord::FromUnsigned(OBJ_SET_POINTER(newp)); }
bool ContainsNormalLengthWord(void) const { return OBJ_IS_LENGTH(LengthWord()); }
- // Find the start of the constant section for a piece of code.
- // The first of these is really only needed because we may have objects whose length
- // words have been overwritten.
- void GetConstSegmentForCode(POLYUNSIGNED obj_length, PolyWord * &cp, POLYUNSIGNED &count) const
- {
- PolyWord *last_word = Offset(obj_length - 1); // Last word in the code
- cp = last_word + 1 + last_word->AsSigned() / sizeof(PolyWord);
- count = cp[-1].AsUnsigned();
- }
- void GetConstSegmentForCode(PolyWord * &cp, POLYUNSIGNED &count) const
- {
- GetConstSegmentForCode(Length(), cp, count);
- }
- PolyWord *ConstPtrForCode(void) const
- {
- PolyWord *cp; POLYUNSIGNED count;
- GetConstSegmentForCode(cp, count);
- return cp;
- }
// Follow a chain of forwarding pointers
PolyObject *FollowForwardingChain(void)
{
if (ContainsForwardingPtr())
return GetForwardingPtr()->FollowForwardingChain();
else return this;
}
};
// Stacks are native-words size even in 32-in-64.
union stackItem
{
stackItem(PolyWord v) { argValue = v.AsUnsigned(); }
stackItem() { argValue = TAGGED(0).AsUnsigned(); }
// These return the low order word.
PolyWord w()const { return PolyWord::FromUnsigned((POLYUNSIGNED)argValue); }
operator PolyWord () { return PolyWord::FromUnsigned((POLYUNSIGNED)argValue); }
POLYCODEPTR codeAddr; // Return addresses
stackItem* stackAddr; // Stack addresses
uintptr_t argValue; // Treat an address as an int
};
/* There was a problem with version 2.95 on Sparc/Solaris at least. The PolyObject
class has no members so classes derived from it e.g. ML_Cons_Cell should begin at
the beginning of the object. Later versions of GCC get this right. */
#if defined(__GNUC__) && (__GNUC__ <= 2)
#error Poly/ML requires GCC version 3 or newer
#endif
inline POLYUNSIGNED GetLengthWord(PolyWord p) { return p.AsObjPtr()->LengthWord(); }
// Get the length of an object.
inline POLYUNSIGNED OBJECT_LENGTH(PolyWord p) { return OBJ_OBJECT_LENGTH(GetLengthWord(p)); }
// A list cell. This can be passed to or returned from certain RTS functions.
class ML_Cons_Cell: public PolyObject {
public:
PolyWord h;
PolyWord t;
#define ListNull (TAGGED(0))
static bool IsNull(PolyWord p) { return p == ListNull; }
};
/* An exception packet. This contains an identifier (either a tagged integer for RTS
exceptions or the address of a mutable for those created within ML), a string
name for printing and an exception argument value. */
class PolyException: public PolyObject {
public:
PolyWord ex_id; /* Exc identifier */
PolyWord ex_name;/* Exc name */
PolyWord arg; /* Exc arguments */
PolyWord ex_location; // Location of "raise". Always zero for RTS exceptions.
};
typedef PolyException poly_exn;
/* Macro to round a number of bytes up to a number of words. */
#define WORDS(s) ((s+sizeof(PolyWord)-1)/sizeof(PolyWord))
/**********************************************************************
*
* Representation of option type.
*
**********************************************************************/
#define NONE_VALUE (TAGGED(0))
/* SOME x is represented by a single word cell containing x. */
#if (defined(_WIN32))
/* Windows doesn't include 0x in %p format. */
#define ZERO_X "0x"
#else
#define ZERO_X ""
#endif
#endif
diff --git a/libpolyml/machine_dep.h b/libpolyml/machine_dep.h
index 5db83c26..2024f851 100644
--- a/libpolyml/machine_dep.h
+++ b/libpolyml/machine_dep.h
@@ -1,69 +1,104 @@
/*
Title: machine_dep.h - exports signature for machine_dep.c
Copyright (c) 2000
Cambridge University Technical Services Limited
Further development Copyright 2020-21 David C. J. Matthews
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef _MACHINE_DEP_H
#define _MACHINE_DEP_H
class ScanAddress;
class TaskData;
class SaveVecEntry;
typedef SaveVecEntry *Handle;
class StackSpace;
// Machine architecture values.
typedef enum {
MA_Interpreted = 0,
MA_I386,
MA_X86_64,
MA_X86_64_32,
MA_Arm64,
MA_Arm64_32
} Architectures;
// Machine-dependent module.
class MachineDependent {
public:
virtual ~MachineDependent() {} // Keep the compiler happy
// Create the machine-specific task data object.
virtual TaskData *CreateTaskData(void) = 0;
virtual unsigned InitialStackSize(void) { return 128; } // Initial size of a stack
// Must be > 40 (i.e. 2*min_stack_check) + base area in each stack frame
+ // Find the start of the constant section for a piece of code.
+ // This is the default version which uses the whole of the last word as a
+ // byte offset.
+ // Normally the constant area is located within the code object and the offset is a small
+ // negative value. When creating position-independent code we need to put the constants in a
+ // separate area. We have to use a relative offset to the constants rather than an absolute
+ // address to ensure that the code is position-independent.
+ virtual void GetConstSegmentForCode(PolyObject *obj, POLYUNSIGNED obj_length, PolyWord*& cp, POLYUNSIGNED& count) const
+ {
+ PolyWord* last_word = obj->Offset(obj_length - 1); // Last word in the code
+ POLYSIGNED offset = last_word->AsSigned();
+ cp = last_word + 1 + offset / sizeof(PolyWord);
+ count = cp[-1].AsUnsigned();
+ }
+ void GetConstSegmentForCode(PolyObject* obj, PolyWord*& cp, POLYUNSIGNED& count) const
+ {
+ GetConstSegmentForCode(obj, obj->Length(), cp, count);
+ }
+ PolyWord* ConstPtrForCode(PolyObject* obj) const
+ {
+ PolyWord* cp; POLYUNSIGNED count;
+ GetConstSegmentForCode(obj, cp, count);
+ return cp;
+ }
+
/* ScanConstantsWithinCode - update addresses within a code segment.*/
- virtual void ScanConstantsWithinCode(PolyObject *addr, PolyObject *oldAddr, POLYUNSIGNED length, ScanAddress *process) {}
- void ScanConstantsWithinCode(PolyObject *addr, ScanAddress *process)
- { ScanConstantsWithinCode(addr, addr, addr->Length(), process); } // Common case
+ virtual void ScanConstantsWithinCode(PolyObject* addr, PolyObject* old, POLYUNSIGNED length,
+ PolyWord* newConstAddr, PolyWord* oldConstAddr, POLYUNSIGNED numConsts, ScanAddress* process) {}
+
+ void ScanConstantsWithinCode(PolyObject* addr, POLYUNSIGNED length, ScanAddress* process)
+ {
+ PolyWord* constAddr;
+ POLYUNSIGNED count;
+ GetConstSegmentForCode(addr, length, constAddr, count);
+ ScanConstantsWithinCode(addr, addr, length, constAddr, constAddr, count, process);
+ }
+
+ void ScanConstantsWithinCode(PolyObject* addr, ScanAddress* process)
+ { ScanConstantsWithinCode(addr, addr->Length(), process); } // Common case
virtual void FlushInstructionCache(void *p, POLYUNSIGNED bytes) {}
virtual Architectures MachineArchitecture(void) = 0;
virtual void SetBootArchitecture(char arch, unsigned wordLength) {}
};
extern MachineDependent *machineDependent;
extern struct _entrypts machineSpecificEPT[];
#endif /* _MACHINE_DEP_H */
diff --git a/libpolyml/machoexport.cpp b/libpolyml/machoexport.cpp
index f492ce56..02121004 100644
--- a/libpolyml/machoexport.cpp
+++ b/libpolyml/machoexport.cpp
@@ -1,527 +1,541 @@
/*
Title: Write out a database as a Mach object file
Author: David Matthews.
- Copyright (c) 2006-7, 2011-2, 2016-18, 2020 David C. J. Matthews
+ Copyright (c) 2006-7, 2011-2, 2016-18, 2020-21 David C. J. Matthews
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR H PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#ifdef HAVE_STDLIB_H
#include
#endif
#ifdef HAVE_STDIO_H
#include
#endif
#ifdef HAVE_STDDEF_H
#include
#endif
#ifdef HAVE_UNISTD_H
#include
#endif
#ifdef HAVE_ERRNO_H
#include
#endif
#ifdef HAVE_TIME_H
#include
#endif
#ifdef HAVE_ASSERT_H
#include
#define ASSERT(x) assert(x)
#else
#define ASSERT(x)
#endif
// If we haven't got the Mach header files we shouldn't be building this.
#include
#include
#include
#include
#ifdef HAVE_STRING_H
#include
#endif
#ifdef HAVE_SYS_UTSNAME_H
#include
#endif
#include "globals.h"
#include "diagnostics.h"
#include "sys.h"
#include "machine_dep.h"
#include "gc.h"
#include "mpoly.h"
#include "scanaddrs.h"
#include "machoexport.h"
#include "run_time.h"
#include "version.h"
#include "polystring.h"
#include "timing.h"
// Mach-O seems to require each section to have a discrete virtual address range
// so we have to adjust various offsets to fit.
void MachoExport::adjustOffset(unsigned area, size_t &offset)
{
// Add in the offset. If sect is memTableEntries it's actually the
// descriptors so doesn't have any additional offset.
if (area != memTableEntries)
{
offset += sizeof(exportDescription)+sizeof(memoryTableEntry)*memTableEntries;
for (unsigned i = 0; i < area; i++)
offset += memTable[i].mtLength;
}
}
void MachoExport::addExternalReference(void *relocAddr, const char *name, bool /*isFuncPtr*/)
{
externTable.makeEntry(name);
writeRelocation(0, relocAddr, symbolNum++, true);
}
// Generate the address relative to the start of the segment.
void MachoExport::setRelocationAddress(void *p, int32_t *reloc)
{
unsigned area = findArea(p);
size_t offset = (char*)p - (char*)memTable[area].mtOriginalAddr;
*reloc = offset;
}
/* Get the index corresponding to an address. */
PolyWord MachoExport::createRelocation(PolyWord p, void *relocAddr)
{
void *addr = p.AsAddress();
unsigned addrArea = findArea(addr);
size_t offset = (char*)addr - (char*)memTable[addrArea].mtOriginalAddr;
adjustOffset(addrArea, offset);
return writeRelocation(offset, relocAddr, addrArea+1 /* Sections count from 1 */, false);
}
PolyWord MachoExport::writeRelocation(POLYUNSIGNED offset, void *relocAddr, unsigned symbolNumber, bool isExtern)
{
// It looks as though struct relocation_info entries are only used
// with GENERIC_RELOC_VANILLA types.
struct relocation_info relInfo;
setRelocationAddress(relocAddr, &relInfo.r_address);
relInfo.r_symbolnum = symbolNumber;
relInfo.r_pcrel = 0;
#if (SIZEOF_VOIDP == 8)
relInfo.r_length = 3; // 8 bytes
relInfo.r_type = X86_64_RELOC_UNSIGNED;
#else
relInfo.r_length = 2; // 4 bytes
relInfo.r_type = GENERIC_RELOC_VANILLA;
#endif
relInfo.r_extern = isExtern ? 1 : 0;
fwrite(&relInfo, sizeof(relInfo), 1, exportFile);
relocationCount++;
return PolyWord::FromUnsigned(offset);
}
/* This is called for each constant within the code.
Print a relocation entry for the word and return a value that means
that the offset is saved in original word. */
-void MachoExport::ScanConstant(PolyObject *base, byte *addr, ScanRelocationKind code)
+void MachoExport::ScanConstant(PolyObject *base, byte *addr, ScanRelocationKind code, intptr_t displacement)
{
#ifndef POLYML32IN64
- PolyObject *p = GetConstantValue(addr, code);
+ PolyObject *p = GetConstantValue(addr, code, displacement);
if (p == 0)
return;
void *a = p;
unsigned aArea = findArea(a);
// Set the value at the address to the offset relative to the symbol.
size_t offset = (char*)a - (char*)memTable[aArea].mtOriginalAddr;
adjustOffset(aArea, offset);
switch (code)
{
case PROCESS_RELOC_DIRECT: // 32 bit address of target
{
struct relocation_info reloc;
setRelocationAddress(addr, &reloc.r_address);
reloc.r_symbolnum = aArea+1; // Section numbers start at 1
reloc.r_pcrel = 0;
#if (defined(HOSTARCHITECTURE_X86_64))
reloc.r_length = 3; // 8 bytes
reloc.r_type = X86_64_RELOC_UNSIGNED;
#else
reloc.r_length = 2; // 4 bytes
reloc.r_type = GENERIC_RELOC_VANILLA;
#endif
reloc.r_extern = 0; // r_symbolnum is a section number. It should be 1 if we make the IO area a common.
for (unsigned i = 0; i < sizeof(PolyWord); i++)
{
addr[i] = (byte)(offset & 0xff);
offset >>= 8;
}
fwrite(&reloc, sizeof(reloc), 1, exportFile);
relocationCount++;
}
break;
#if (defined(HOSTARCHITECTURE_X86) || defined(HOSTARCHITECTURE_X86_64))
case PROCESS_RELOC_I386RELATIVE: // 32 bit relative address
{
unsigned addrArea = findArea(addr);
// If it's in the same area we don't need a relocation because the
// relative offset will be unchanged.
if (addrArea != aArea)
{
struct relocation_info reloc;
setRelocationAddress(addr, &reloc.r_address);
reloc.r_symbolnum = aArea+1; // Section numbers start at 1
reloc.r_pcrel = 1;
reloc.r_length = 2; // 4 bytes
#if (defined(HOSTARCHITECTURE_X86_64))
reloc.r_type = X86_64_RELOC_SIGNED;
#else
reloc.r_type = GENERIC_RELOC_VANILLA;
#endif
reloc.r_extern = 0; // r_symbolnum is a section number.
fwrite(&reloc, sizeof(reloc), 1, exportFile);
relocationCount++;
size_t addrOffset = (char*)addr - (char*)memTable[addrArea].mtOriginalAddr;
adjustOffset(addrArea, addrOffset);
offset -= addrOffset + 4;
for (unsigned i = 0; i < 4; i++)
{
addr[i] = (byte)(offset & 0xff);
offset >>= 8;
}
}
}
break;
#endif
default:
ASSERT(0); // Wrong type of relocation for this architecture.
}
#endif
}
// Set the file alignment.
void MachoExport::alignFile(int align)
{
char pad[32] = {0}; // Maximum alignment
int offset = ftell(exportFile);
if ((offset % align) == 0) return;
fwrite(&pad, align - (offset % align), 1, exportFile);
}
void MachoExport::createStructsRelocation(unsigned sect, size_t offset)
{
struct relocation_info reloc;
reloc.r_address = offset;
reloc.r_symbolnum = sect+1; // Section numbers start at 1
reloc.r_pcrel = 0;
#if (SIZEOF_VOIDP == 8)
reloc.r_length = 3; // 8 bytes
reloc.r_type = X86_64_RELOC_UNSIGNED;
#else
reloc.r_length = 2; // 4 bytes
reloc.r_type = GENERIC_RELOC_VANILLA;
#endif
reloc.r_extern = 0; // r_symbolnum is a section number.
fwrite(&reloc, sizeof(reloc), 1, exportFile);
relocationCount++;
}
void MachoExport::exportStore(void)
{
PolyWord *p;
#if (SIZEOF_VOIDP == 8)
struct mach_header_64 fhdr;
struct segment_command_64 sHdr;
struct section_64 *sections = new section_64[memTableEntries+1];
size_t sectionSize = sizeof(section_64);
#else
struct mach_header fhdr;
struct segment_command sHdr;
struct section *sections = new section[memTableEntries+1];
size_t sectionSize = sizeof(section);
#endif
struct symtab_command symTab;
unsigned i;
// Write out initial values for the headers. These are overwritten at the end.
// File header
memset(&fhdr, 0, sizeof(fhdr));
fhdr.filetype = MH_OBJECT;
fhdr.ncmds = 2; // One for the segment and one for the symbol table.
fhdr.sizeofcmds = sizeof(sHdr) + sectionSize * (memTableEntries+1) + sizeof(symTab);
fhdr.flags = 0;
// The machine needs to match the machine we're compiling for
// even if this is actually portable code.
#if (SIZEOF_VOIDP == 8)
fhdr.magic = MH_MAGIC_64; // (0xfeedfacf) 64-bit magic number
#else
fhdr.magic = MH_MAGIC; // Feed Face (0xfeedface)
#endif
#if defined(HOSTARCHITECTURE_X86)
fhdr.cputype = CPU_TYPE_I386;
fhdr.cpusubtype = CPU_SUBTYPE_I386_ALL;
#elif defined(HOSTARCHITECTURE_PPC)
fhdr.cputype = CPU_TYPE_POWERPC;
fhdr.cpusubtype = CPU_SUBTYPE_POWERPC_ALL;
#elif defined(HOSTARCHITECTURE_X86_64)
fhdr.cputype = CPU_TYPE_X86_64;
fhdr.cpusubtype = CPU_SUBTYPE_X86_64_ALL;
#elif defined(HOSTARCHITECTURE_AARCH64)
fhdr.cputype = CPU_TYPE_ARM64;
fhdr.cpusubtype = CPU_SUBTYPE_ARM64_ALL;
#else
#error "No support for exporting on this architecture"
#endif
fwrite(&fhdr, sizeof(fhdr), 1, exportFile); // Write it for the moment.
symbolNum = 1; // The first symbol is poly_exports
// Segment header.
memset(&sHdr, 0, sizeof(sHdr));
#if (SIZEOF_VOIDP == 8)
sHdr.cmd = LC_SEGMENT_64;
#else
sHdr.cmd = LC_SEGMENT;
#endif
sHdr.nsects = memTableEntries+1; // One for each entry plus one for the tables.
sHdr.cmdsize = sizeof(sHdr) + sectionSize * sHdr.nsects;
// Add up the sections to give the file size
sHdr.filesize = 0;
for (i = 0; i < memTableEntries; i++)
sHdr.filesize += memTable[i].mtLength; // Do we need any alignment?
sHdr.filesize += sizeof(exportDescription) + memTableEntries * sizeof(memoryTableEntry);
sHdr.vmsize = sHdr.filesize; // Set them the same since we don't have any "common" area.
// sHdr.fileOff is set later.
sHdr.maxprot = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE;
sHdr.initprot = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE;
sHdr.flags = 0;
// Write it initially.
fwrite(&sHdr, sizeof(sHdr), 1, exportFile);
// Section header for each entry in the table
POLYUNSIGNED sectAddr = sizeof(exportDescription)+sizeof(memoryTableEntry)*memTableEntries;
for (i = 0; i < memTableEntries; i++)
{
memset(&(sections[i]), 0, sectionSize);
if (memTable[i].mtFlags & MTF_WRITEABLE)
{
// Mutable areas
ASSERT(!(memTable[i].mtFlags & MTF_EXECUTABLE)); // Executable areas can't be writable.
sprintf(sections[i].sectname, "__data");
sprintf(sections[i].segname, "__DATA");
sections[i].flags = S_ATTR_LOC_RELOC | S_REGULAR;
}
#ifndef CODEISNOTEXECUTABLE
// Not if we're building the interpreted version.
else if (memTable[i].mtFlags & MTF_EXECUTABLE)
{
sprintf(sections[i].sectname, "__text");
sprintf(sections[i].segname, "__TEXT");
sections[i].flags = S_ATTR_LOC_RELOC | S_ATTR_SOME_INSTRUCTIONS | S_REGULAR;
}
#endif
else
{
sprintf(sections[i].sectname, "__const");
sprintf(sections[i].segname, "__DATA");
sections[i].flags = S_ATTR_LOC_RELOC | S_REGULAR;
}
sections[i].addr = sectAddr;
sections[i].size = memTable[i].mtLength;
sectAddr += memTable[i].mtLength;
//sections[i].offset is set later
//sections[i].reloff is set later
//sections[i].nreloc is set later
sections[i].align = 3; // 8 byte alignment
// theSection.size is set later
}
// For the tables.
memset(&(sections[memTableEntries]), 0, sectionSize);
sprintf(sections[memTableEntries].sectname, "__const");
sprintf(sections[memTableEntries].segname, "__DATA");
sections[memTableEntries].addr = 0;
sections[memTableEntries].size = sizeof(exportDescription)+sizeof(memoryTableEntry)*memTableEntries;
sections[memTableEntries].align = 3; // 8 byte alignment
// theSection.size is set later
sections[memTableEntries].flags = S_ATTR_LOC_RELOC | S_ATTR_SOME_INSTRUCTIONS | S_REGULAR;
// Write them out for the moment.
fwrite(sections, sectionSize * (memTableEntries+1), 1, exportFile);
// Symbol table header.
memset(&symTab, 0, sizeof(symTab));
symTab.cmd = LC_SYMTAB;
symTab.cmdsize = sizeof(symTab);
//symTab.symoff is set later
//symTab.nsyms is set later
//symTab.stroff is set later
//symTab.strsize is set later
fwrite(&symTab, sizeof(symTab), 1, exportFile);
// Create and write out the relocations.
for (i = 0; i < memTableEntries; i++)
{
sections[i].reloff = ftell(exportFile);
relocationCount = 0;
// Create the relocation table and turn all addresses into offsets.
char *start = (char*)memTable[i].mtOriginalAddr;
char *end = start + memTable[i].mtLength;
for (p = (PolyWord*)start; p < (PolyWord*)end; )
{
p++;
PolyObject *obj = (PolyObject*)p;
POLYUNSIGNED length = obj->Length();
if (length != 0 && obj->IsCodeObject())
+ {
+ POLYUNSIGNED constCount;
+ PolyWord* cp;
+ // Get the constant area pointer first because ScanConstantsWithinCode
+ // may alter it.
+ machineDependent->GetConstSegmentForCode(obj, cp, constCount);
+ // Update any constants before processing the object
+ // We need that for relative jumps/calls in X86/64.
machineDependent->ScanConstantsWithinCode(obj, this);
- relocateObject(obj);
+ if (cp > (PolyWord*)obj && cp < ((PolyWord*)obj) + length)
+ {
+ // Process the constants if they're in the area but not if they've been moved.
+ for (POLYUNSIGNED i = 0; i < constCount; i++) relocateValue(&(cp[i]));
+ }
+ }
+ else relocateObject(obj);
p += length;
}
sections[i].nreloc = relocationCount;
}
// Additional relocations for the descriptors.
sections[memTableEntries].reloff = ftell(exportFile);
relocationCount = 0;
// Address of "memTable" within "exports". We can't use createRelocation because
// the position of the relocation is not in either the mutable or the immutable area.
createStructsRelocation(memTableEntries, offsetof(exportDescription, memTable));
// Address of "rootFunction" within "exports"
unsigned rootAddrArea = findArea(rootFunction);
size_t rootOffset = (char*)rootFunction - (char*)memTable[rootAddrArea].mtOriginalAddr;
adjustOffset(rootAddrArea, rootOffset);
createStructsRelocation(rootAddrArea, offsetof(exportDescription, rootFunction));
// Addresses of the areas within memtable.
for (i = 0; i < memTableEntries; i++)
{
createStructsRelocation(i,
sizeof(exportDescription) + i * sizeof(memoryTableEntry) + offsetof(memoryTableEntry, mtCurrentAddr));
}
sections[memTableEntries].nreloc = relocationCount;
// The symbol table.
symTab.symoff = ftell(exportFile);
// Global symbols: Just one.
{
#if (SIZEOF_VOIDP == 8)
struct nlist_64 symbol;
#else
struct nlist symbol;
#endif
memset(&symbol, 0, sizeof(symbol)); // Zero unused fields
symbol.n_un.n_strx = stringTable.makeEntry("_poly_exports");
symbol.n_type = N_EXT | N_SECT;
symbol.n_sect = memTableEntries+1; // Sections count from 1.
symbol.n_desc = REFERENCE_FLAG_DEFINED;
fwrite(&symbol, sizeof(symbol), 1, exportFile);
}
// External references.
for (unsigned i = 0; i < externTable.stringSize; i += (unsigned)strlen(externTable.strings+i) + 1)
{
const char *symbolName = externTable.strings+i;
#if (SIZEOF_VOIDP == 8)
struct nlist_64 symbol;
#else
struct nlist symbol;
#endif
memset(&symbol, 0, sizeof(symbol)); // Zero unused fields
// Have to add an underscore to the symbols.
TempCString fullSymbol;
fullSymbol = (char*)malloc(strlen(symbolName) + 2);
if (fullSymbol == 0) throw MemoryException();
sprintf(fullSymbol, "_%s", symbolName);
symbol.n_un.n_strx = stringTable.makeEntry(fullSymbol);
symbol.n_type = N_EXT | N_UNDF;
symbol.n_sect = NO_SECT;
symbol.n_desc = REFERENCE_FLAG_UNDEFINED_NON_LAZY;
fwrite(&symbol, sizeof(symbol), 1, exportFile);
}
symTab.nsyms = symbolNum;
// The symbol name table
symTab.stroff = ftell(exportFile);
fwrite(stringTable.strings, stringTable.stringSize, 1, exportFile);
symTab.strsize = stringTable.stringSize;
alignFile(4);
exportDescription exports;
memset(&exports, 0, sizeof(exports));
exports.structLength = sizeof(exportDescription);
exports.memTableSize = sizeof(memoryTableEntry);
exports.memTableEntries = memTableEntries;
exports.memTable = (memoryTableEntry *)sizeof(exportDescription); // It follows immediately after this.
// Set the value to be the offset relative to the base of the area. We have set a relocation
// already which will add the base of the area.
exports.rootFunction = (void*)rootOffset;
exports.timeStamp = getBuildTime();
exports.architecture = machineDependent->MachineArchitecture();
exports.rtsVersion = POLY_version_number;
#ifdef POLYML32IN64
exports.originalBaseAddr = globalHeapBase;
#else
exports.originalBaseAddr = 0;
#endif
sections[memTableEntries].offset = ftell(exportFile);
fwrite(&exports, sizeof(exports), 1, exportFile);
size_t addrOffset = sizeof(exports)+sizeof(memoryTableEntry)*memTableEntries;
for (i = 0; i < memTableEntries; i++)
{
void *save = memTable[i].mtCurrentAddr;
memTable[i].mtCurrentAddr = (void*)addrOffset; // Set this to the relative address.
addrOffset += memTable[i].mtLength;
fwrite(&memTable[i], sizeof(memoryTableEntry), 1, exportFile);
memTable[i].mtCurrentAddr = save;
}
// Now the binary data.
for (i = 0; i < memTableEntries; i++)
{
alignFile(4);
sections[i].offset = ftell(exportFile);
fwrite(memTable[i].mtOriginalAddr, 1, memTable[i].mtLength, exportFile);
}
// Rewind to rewrite the headers with the actual offsets.
rewind(exportFile);
fwrite(&fhdr, sizeof(fhdr), 1, exportFile); // File header
fwrite(&sHdr, sizeof(sHdr), 1, exportFile); // Segment header
fwrite(sections, sectionSize * (memTableEntries+1), 1, exportFile); // Section headers
fwrite(&symTab, sizeof(symTab), 1, exportFile); // Symbol table header
fclose(exportFile); exportFile = NULL;
delete[](sections);
}
diff --git a/libpolyml/machoexport.h b/libpolyml/machoexport.h
index 260c2ad3..6c2fe27f 100644
--- a/libpolyml/machoexport.h
+++ b/libpolyml/machoexport.h
@@ -1,62 +1,66 @@
/*
Title: Export memory as a Mach object file
Author: David C. J. Matthews.
- Copyright (c) 2006,2016-18 David C. J. Matthews
+ Copyright (c) 2006, 2016-18, 2020 David C. J. Matthews
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR H PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef MachExport_H_INCLUDED
#define MachExport_H_INCLUDED
#include "config.h"
#include "scanaddrs.h" // For base class
#include "exporter.h"
#include
class MachoExport: public Exporter, public ScanAddress
{
public:
MachoExport(): relocationCount(0), symbolNum(0) {}
public:
virtual void exportStore(void);
private:
// ScanAddress overrides
- virtual void ScanConstant(PolyObject *base, byte *addrOfConst, ScanRelocationKind code);
+ virtual void ScanConstant(PolyObject *base, byte *addrOfConst, ScanRelocationKind code, intptr_t displacement);
// At the moment we should only get calls to ScanConstant.
virtual PolyObject *ScanObjectAddress(PolyObject *base) { return base; }
virtual void addExternalReference(void *addr, const char *name, bool isFuncPtr);
+ virtual void RelocateOnly(PolyObject* base, byte* addressOfConstant, ScanRelocationKind code)
+ {
+ ScanConstant(base, addressOfConstant, code, 0);
+ }
private:
void setRelocationAddress(void *p, int32_t *reloc);
PolyWord createRelocation(PolyWord p, void *relocAddr);
PolyWord writeRelocation(POLYUNSIGNED offset, void *relocAddr, unsigned symbolNumber, bool isExtern);
void alignFile(int align);
void createStructsRelocation(unsigned area, size_t offset);
void adjustOffset(unsigned area, size_t &offset);
unsigned relocationCount;
ExportStringTable stringTable;
// Table and count for external references.
ExportStringTable externTable;
unsigned symbolNum;
};
#endif
diff --git a/libpolyml/memmgr.cpp b/libpolyml/memmgr.cpp
index f461c5e2..cc99c9f0 100644
--- a/libpolyml/memmgr.cpp
+++ b/libpolyml/memmgr.cpp
@@ -1,1388 +1,1392 @@
/*
Title: memmgr.cpp Memory segment manager
Copyright (c) 2006-7, 2011-12, 2016-18 David C. J. Matthews
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#elif defined(_WIN32)
#include "winconfig.h"
#else
#error "No configuration file"
#endif
#ifdef HAVE_ASSERT_H
#include
#define ASSERT(x) assert(x)
#else
#define ASSERT(x)
#endif
#include
#include
#include "globals.h"
#include "memmgr.h"
#include "osmem.h"
#include "scanaddrs.h"
#include "bitmap.h"
#include "mpoly.h"
#include "diagnostics.h"
#include "statistics.h"
#include "processes.h"
#include "machine_dep.h"
#ifdef POLYML32IN64
// This contains the address of the base of the heap.
PolyWord *globalHeapBase, *globalCodeBase;
#endif
// heap resizing policy option requested on command line
unsigned heapsizingOption = 0;
// If we are building for the interpreted version we don't need or want the
// code to be executable.
static const enum OSMem::_MemUsage executableCodeWhereNecessary =
#ifdef CODEISNOTEXECUTABLE
OSMem::UsageData;
#else
OSMem::UsageExecutableCode;
#endif
MemSpace::MemSpace(OSMem *alloc): SpaceTree(true)
{
spaceType = ST_PERMANENT;
isMutable = false;
bottom = 0;
top = 0;
isCode = false;
allocator = alloc;
shadowSpace = 0;
}
MemSpace::~MemSpace()
{
if (allocator != 0 && bottom != 0)
{
if (isCode)
allocator->FreeCodeArea(bottom, shadowSpace, (char*)top - (char*)bottom);
else allocator->FreeDataArea(bottom, (char*)top - (char*)bottom);
}
}
MarkableSpace::MarkableSpace(OSMem *alloc): MemSpace(alloc), spaceLock("Local space")
{
}
LocalMemSpace::LocalMemSpace(OSMem *alloc): MarkableSpace(alloc)
{
spaceType = ST_LOCAL;
upperAllocPtr = lowerAllocPtr = 0;
for (unsigned i = 0; i < NSTARTS; i++)
start[i] = 0;
start_index = 0;
i_marked = m_marked = updated = 0;
allocationSpace = false;
}
bool LocalMemSpace::InitSpace(PolyWord *heapSpace, uintptr_t size, bool mut)
{
isMutable = mut;
bottom = heapSpace;
top = bottom + size;
// Initialise all the fields. The partial GC in particular relies on this.
upperAllocPtr = partialGCTop = fullGCRescanStart = fullGCLowerLimit = lowestWeak = top;
lowerAllocPtr = partialGCScan = partialGCRootBase = partialGCRootTop =
fullGCRescanEnd = highestWeak = bottom;
#ifdef POLYML32IN64
// The address must be on an odd-word boundary so that after the length
// word is put in the actual cell address is on an even-word boundary.
lowerAllocPtr[0] = PolyWord::FromUnsigned(0);
lowerAllocPtr = bottom + 1;
#endif
spaceOwner = 0;
allocationSpace = false;
// Bitmap for the space.
return bitmap.Create(size);
}
MemMgr::MemMgr(): allocLock("Memmgr alloc"), codeBitmapLock("Code bitmap")
{
nextIndex = 0;
reservedSpace = 0;
nextAllocator = 0;
defaultSpaceSize = 0;
spaceBeforeMinorGC = 0;
spaceForHeap = 0;
currentAllocSpace = currentHeapSize = 0;
defaultSpaceSize = 1024 * 1024 / sizeof(PolyWord); // 1Mbyte segments.
spaceTree = new SpaceTreeTree;
}
MemMgr::~MemMgr()
{
delete(spaceTree); // Have to do this before we delete the spaces.
for (std::vector::iterator i = pSpaces.begin(); i < pSpaces.end(); i++)
delete(*i);
for (std::vector::iterator i = lSpaces.begin(); i < lSpaces.end(); i++)
delete(*i);
for (std::vector::iterator i = eSpaces.begin(); i < eSpaces.end(); i++)
delete(*i);
for (std::vector::iterator i = sSpaces.begin(); i < sSpaces.end(); i++)
delete(*i);
for (std::vector::iterator i = cSpaces.begin(); i < cSpaces.end(); i++)
delete(*i);
}
bool MemMgr::Initialise()
{
#ifdef POLYML32IN64
- // Allocate a single 16G area but with no access.
+ // Reserve a single 16G area but with no access.
void *heapBase;
if (!osHeapAlloc.Initialise(OSMem::UsageData, (size_t)16 * 1024 * 1024 * 1024, &heapBase))
return false;
globalHeapBase = (PolyWord*)heapBase;
-
// Allocate a 4 gbyte area for the stacks.
// It's important that the stack and code areas have addresses with
// non-zero top 32-bits.
if (!osStackAlloc.Initialise(OSMem::UsageStack, (size_t)4 * 1024 * 1024 * 1024))
return false;
-
- // Allocate a 2G area for the code.
- void *codeBase;
+#else
+ if (!osHeapAlloc.Initialise(OSMem::UsageData) || !osStackAlloc.Initialise(OSMem::UsageStack))
+ return false;
+#endif
+#if (defined(POLYML32IN64) || defined(HOSTARCHITECTURE_X86_64))
+ // Reserve a 2G area for the code.
+ void* codeBase;
if (!osCodeAlloc.Initialise(executableCodeWhereNecessary,
- (size_t)2 * 1024 * 1024 * 1024, &codeBase))
+ (size_t)2 * 1024 * 1024 * 1024, &codeBase))
return false;
+#ifdef POLYML32IN64
globalCodeBase = (PolyWord*)codeBase;
+#endif
return true;
#else
- return osHeapAlloc.Initialise(OSMem::UsageData) && osStackAlloc.Initialise(OSMem::UsageStack) &&
- osCodeAlloc.Initialise(executableCodeWhereNecessary);
+ return osCodeAlloc.Initialise(executableCodeWhereNecessary);
#endif
}
// Create and initialise a new local space and add it to the table.
LocalMemSpace* MemMgr::NewLocalSpace(uintptr_t size, bool mut)
{
try {
LocalMemSpace *space = new LocalMemSpace(&osHeapAlloc);
// Before trying to allocate the heap temporarily allocate the
// reserved space. This ensures that this much space will always
// be available for C stacks and the C++ heap.
void *reservation = 0;
size_t rSpace = reservedSpace*sizeof(PolyWord);
if (reservedSpace != 0) {
reservation = osHeapAlloc.AllocateDataArea(rSpace);
if (reservation == NULL) {
// Insufficient space for the reservation. Can't allocate this local space.
if (debugOptions & DEBUG_MEMMGR)
Log("MMGR: New local %smutable space: insufficient reservation space\n", mut ? "": "im");
delete space;
return 0;
}
}
// Allocate the heap itself.
size_t iSpace = size * sizeof(PolyWord);
PolyWord* heapSpace = (PolyWord*)osHeapAlloc.AllocateDataArea(iSpace);
// The size may have been rounded up to a block boundary.
size = iSpace / sizeof(PolyWord);
bool success = heapSpace != 0 && space->InitSpace(heapSpace, size, mut) && AddLocalSpace(space);
if (reservation != 0) osHeapAlloc.FreeDataArea(reservation, rSpace);
if (success)
{
if (debugOptions & DEBUG_MEMMGR)
Log("MMGR: New local %smutable space %p, size=%luk words, bottom=%p, top=%p\n", mut ? "": "im",
space, space->spaceSize()/1024, space->bottom, space->top);
currentHeapSize += space->spaceSize();
globalStats.setSize(PSS_TOTAL_HEAP, currentHeapSize * sizeof(PolyWord));
return space;
}
// If something went wrong.
delete space;
if (debugOptions & DEBUG_MEMMGR)
Log("MMGR: New local %smutable space: insufficient space\n", mut ? "": "im");
return 0;
}
catch (std::bad_alloc&) {
if (debugOptions & DEBUG_MEMMGR)
Log("MMGR: New local %smutable space: \"new\" failed\n", mut ? "": "im");
return 0;
}
}
// Create a local space for initial allocation.
LocalMemSpace *MemMgr::CreateAllocationSpace(uintptr_t size)
{
LocalMemSpace *result = NewLocalSpace(size, true);
if (result)
{
result->allocationSpace = true;
currentAllocSpace += result->spaceSize();
globalStats.incSize(PSS_ALLOCATION, result->spaceSize()*sizeof(PolyWord));
globalStats.incSize(PSS_ALLOCATION_FREE, result->freeSpace()*sizeof(PolyWord));
}
return result;
}
// If an allocation space has a lot of data left in it after a GC, particularly
// a single large object we should turn it into a local area.
void MemMgr::ConvertAllocationSpaceToLocal(LocalMemSpace *space)
{
ASSERT(space->allocationSpace);
space->allocationSpace = false;
// Currently it is left as a mutable area but if the contents are all
// immutable e.g. a large vector it could be better to turn it into an
// immutable area.
currentAllocSpace -= space->spaceSize();
}
// Add a local memory space to the table.
bool MemMgr::AddLocalSpace(LocalMemSpace *space)
{
// Add to the table.
// Update the B-tree.
try {
AddTree(space);
// The entries in the local table are ordered so that the copy phase of the full
// GC simply has to copy to an entry earlier in the table. Immutable spaces come
// first, followed by mutable spaces and finally allocation spaces.
if (space->allocationSpace)
lSpaces.push_back(space); // Just add at the end
else if (space->isMutable)
{
// Add before the allocation spaces
std::vector::iterator i = lSpaces.begin();
while (i != lSpaces.end() && ! (*i)->allocationSpace) i++;
lSpaces.insert(i, space);
}
else
{
// Immutable space: Add before the mutable spaces
std::vector::iterator i = lSpaces.begin();
while (i != lSpaces.end() && ! (*i)->isMutable) i++;
lSpaces.insert(i, space);
}
}
catch (std::bad_alloc&) {
RemoveTree(space);
return false;
}
return true;
}
// Create an entry for a permanent space.
PermanentMemSpace* MemMgr::NewPermanentSpace(PolyWord *base, uintptr_t words,
unsigned flags, unsigned index, unsigned hierarchy /*= 0*/)
{
try {
PermanentMemSpace *space = new PermanentMemSpace(0/* Not freed */);
space->bottom = base;
space->topPointer = space->top = space->bottom + words;
space->spaceType = ST_PERMANENT;
space->isMutable = flags & MTF_WRITEABLE ? true : false;
space->noOverwrite = flags & MTF_NO_OVERWRITE ? true : false;
space->byteOnly = flags & MTF_BYTES ? true : false;
space->isCode = flags & MTF_EXECUTABLE ? true : false;
space->index = index;
space->hierarchy = hierarchy;
if (index >= nextIndex) nextIndex = index+1;
// Extend the permanent memory table and add this space to it.
try {
AddTree(space);
pSpaces.push_back(space);
}
catch (std::exception&) {
RemoveTree(space);
delete space;
return 0;
}
return space;
}
catch (std::bad_alloc&) {
return 0;
}
}
PermanentMemSpace *MemMgr::AllocateNewPermanentSpace(uintptr_t byteSize, unsigned flags, unsigned index, unsigned hierarchy)
{
try {
- OSMem *alloc = flags & MTF_EXECUTABLE ? &osCodeAlloc : &osHeapAlloc;
+ OSMem *alloc = flags & MTF_EXECUTABLE ? (OSMem*)&osCodeAlloc : (OSMem*)&osHeapAlloc;
PermanentMemSpace *space = new PermanentMemSpace(alloc);
size_t actualSize = byteSize;
PolyWord* base;
void* newShadow=0;
if (flags & MTF_EXECUTABLE)
base = (PolyWord*)alloc->AllocateCodeArea(actualSize, newShadow);
else base = (PolyWord*)alloc->AllocateDataArea(actualSize);
if (base == 0)
{
delete(space);
return 0;
}
space->bottom = base;
space->shadowSpace = (PolyWord*)newShadow;
space->topPointer = space->top = space->bottom + actualSize/sizeof(PolyWord);
space->spaceType = ST_PERMANENT;
space->isMutable = flags & MTF_WRITEABLE ? true : false;
space->noOverwrite = flags & MTF_NO_OVERWRITE ? true : false;
space->byteOnly = flags & MTF_BYTES ? true : false;
space->isCode = flags & MTF_EXECUTABLE ? true : false;
space->index = index;
space->hierarchy = hierarchy;
if (index >= nextIndex) nextIndex = index + 1;
// Extend the permanent memory table and add this space to it.
try {
AddTree(space);
pSpaces.push_back(space);
}
catch (std::exception&) {
RemoveTree(space);
delete space;
return 0;
}
return space;
}
catch (std::bad_alloc&) {
return 0;
}
}
bool MemMgr::CompletePermanentSpaceAllocation(PermanentMemSpace *space)
{
// Remove write access unless it is mutable.
// Don't remove write access unless this is top-level. Share-data assumes only hierarchy 0 is write-protected.
if (!space->isMutable && space->hierarchy == 0)
{
if (space->isCode)
osCodeAlloc.DisableWriteForCode(space->bottom, space->shadowSpace, (char*)space->top - (char*)space->bottom);
else osHeapAlloc.EnableWrite(false, space->bottom, (char*)space->top - (char*)space->bottom);
}
return true;
}
// Delete a local space and remove it from the table.
void MemMgr::DeleteLocalSpace(std::vector::iterator &iter)
{
LocalMemSpace *sp = *iter;
if (debugOptions & DEBUG_MEMMGR)
Log("MMGR: Deleted local %s space %p at %p size %zu\n", sp->spaceTypeString(), sp, sp->bottom, sp->spaceSize());
currentHeapSize -= sp->spaceSize();
globalStats.setSize(PSS_TOTAL_HEAP, currentHeapSize * sizeof(PolyWord));
if (sp->allocationSpace) currentAllocSpace -= sp->spaceSize();
RemoveTree(sp);
delete(sp);
iter = lSpaces.erase(iter);
}
// Remove local areas that are now empty after a GC.
// It isn't clear if we always want to do this.
void MemMgr::RemoveEmptyLocals()
{
for (std::vector::iterator i = lSpaces.begin(); i < lSpaces.end(); )
{
LocalMemSpace *space = *i;
if (space->isEmpty())
DeleteLocalSpace(i);
else i++;
}
}
// Create and initialise a new export space and add it to the table.
PermanentMemSpace* MemMgr::NewExportSpace(uintptr_t size, bool mut, bool noOv, bool code)
{
try {
- OSMem *alloc = code ? &osCodeAlloc : &osHeapAlloc;
+ OSMem *alloc = code ? (OSMem*)&osCodeAlloc : (OSMem*)&osHeapAlloc;
PermanentMemSpace *space = new PermanentMemSpace(alloc);
space->spaceType = ST_EXPORT;
space->isMutable = mut;
space->noOverwrite = noOv;
space->isCode = code;
space->index = nextIndex++;
// Allocate the memory itself.
size_t iSpace = size*sizeof(PolyWord);
if (code)
{
void* shadow;
space->bottom = (PolyWord*)alloc->AllocateCodeArea(iSpace, shadow);
if (space->bottom != 0)
space->shadowSpace = (PolyWord*)shadow;
}
else space->bottom = (PolyWord*)alloc->AllocateDataArea(iSpace);
if (space->bottom == 0)
{
delete space;
if (debugOptions & DEBUG_MEMMGR)
Log("MMGR: New export %smutable space: insufficient space\n", mut ? "" : "im");
return 0;
}
// The size may have been rounded up to a block boundary.
size = iSpace/sizeof(PolyWord);
space->top = space->bottom + size;
space->topPointer = space->bottom;
#ifdef POLYML32IN64
// The address must be on an odd-word boundary so that after the length
// word is put in the actual cell address is on an even-word boundary.
space->writeAble(space->topPointer)[0] = PolyWord::FromUnsigned(0);
space->topPointer = space->bottom + 1;
#endif
if (debugOptions & DEBUG_MEMMGR)
Log("MMGR: New export %smutable %s%sspace %p, size=%luk words, bottom=%p, top=%p\n", mut ? "" : "im",
noOv ? "no-overwrite " : "", code ? "code " : "", space,
space->spaceSize() / 1024, space->bottom, space->top);
// Add to the table.
try {
AddTree(space);
eSpaces.push_back(space);
}
catch (std::exception&) {
RemoveTree(space);
delete space;
if (debugOptions & DEBUG_MEMMGR)
Log("MMGR: New export %smutable space: Adding to tree failed\n", mut ? "" : "im");
return 0;
}
return space;
}
catch (std::bad_alloc&) {
if (debugOptions & DEBUG_MEMMGR)
Log("MMGR: New export %smutable space: \"new\" failed\n", mut ? "" : "im");
return 0;
}
}
void MemMgr::DeleteExportSpaces(void)
{
for (std::vector::iterator i = eSpaces.begin(); i < eSpaces.end(); i++)
{
PermanentMemSpace *space = *i;
RemoveTree(space);
delete(space);
}
eSpaces.clear();
}
// If we have saved the state rather than exported a function we turn the exported
// spaces into permanent ones, removing existing permanent spaces at the same or
// lower level.
bool MemMgr::PromoteExportSpaces(unsigned hierarchy)
{
// Save permanent spaces at a lower hierarchy. Others are converted into
// local spaces. Most or all items will have been copied from these spaces
// into an export space but there could be items reachable only from the stack.
std::vector::iterator i = pSpaces.begin();
while (i != pSpaces.end())
{
PermanentMemSpace *pSpace = *i;
if (pSpace->hierarchy < hierarchy)
i++;
else
{
try {
// Turn this into a local space or a code space
// Remove this from the tree - AddLocalSpace will make an entry for the local version.
RemoveTree(pSpace);
if (pSpace->isCode)
{
// Enable write access. Permanent spaces are read-only.
// osCodeAlloc.SetPermissions(pSpace->bottom, (char*)pSpace->top - (char*)pSpace->bottom,
// PERMISSION_READ | PERMISSION_WRITE | PERMISSION_EXEC);
CodeSpace *space = new CodeSpace(pSpace->bottom, pSpace->shadowSpace, pSpace->spaceSize(), &osCodeAlloc);
if (! space->headerMap.Create(space->spaceSize()))
{
if (debugOptions & DEBUG_MEMMGR)
Log("MMGR: Unable to create header map for state space %p\n", pSpace);
return false;
}
if (!AddCodeSpace(space))
{
if (debugOptions & DEBUG_MEMMGR)
Log("MMGR: Unable to convert saved state space %p into code space\n", pSpace);
return false;
}
if (debugOptions & DEBUG_MEMMGR)
Log("MMGR: Converted saved state space %p into code space %p\n", pSpace, space);
// Set the bits in the header map.
for (PolyWord *ptr = space->bottom; ptr < space->top; )
{
PolyObject *obj = (PolyObject*)(ptr+1);
// We may have forwarded this if this has been
// copied to the exported area. Restore the original length word.
if (obj->ContainsForwardingPtr())
{
#ifdef POLYML32IN64
PolyObject *forwardedTo = obj;
// This is relative to globalCodeBase not globalHeapBase
while (forwardedTo->ContainsForwardingPtr())
forwardedTo = (PolyObject*)(globalCodeBase + ((forwardedTo->LengthWord() & ~_OBJ_TOMBSTONE_BIT) << 1));
#else
PolyObject *forwardedTo = obj->FollowForwardingChain();
#endif
obj->SetLengthWord(forwardedTo->LengthWord());
}
// Set the "start" bit if this is allocated. It will be a byte seg if not.
if (obj->IsCodeObject())
space->headerMap.SetBit(ptr-space->bottom);
ASSERT(!obj->IsClosureObject());
ptr += obj->Length() + 1;
}
}
else
{
// Enable write access. Permanent spaces are read-only.
// osHeapAlloc.SetPermissions(pSpace->bottom, (char*)pSpace->top - (char*)pSpace->bottom,
// PERMISSION_READ | PERMISSION_WRITE);
LocalMemSpace *space = new LocalMemSpace(&osHeapAlloc);
space->top = pSpace->top;
// Space is allocated in local areas from the top down. This area is full and
// all data is in the old generation. The area can be recovered by a full GC.
space->bottom = space->upperAllocPtr = space->lowerAllocPtr =
space->fullGCLowerLimit = pSpace->bottom;
space->isMutable = pSpace->isMutable;
space->isCode = false;
if (! space->bitmap.Create(space->top-space->bottom) || ! AddLocalSpace(space))
{
if (debugOptions & DEBUG_MEMMGR)
Log("MMGR: Unable to convert saved state space %p into local space\n", pSpace);
return false;
}
if (debugOptions & DEBUG_MEMMGR)
Log("MMGR: Converted saved state space %p into local %smutable space %p\n",
pSpace, pSpace->isMutable ? "im": "", space);
currentHeapSize += space->spaceSize();
globalStats.setSize(PSS_TOTAL_HEAP, currentHeapSize * sizeof(PolyWord));
}
i = pSpaces.erase(i);
}
catch (std::bad_alloc&) {
return false;
}
}
}
// Save newly exported spaces.
for(std::vector::iterator j = eSpaces.begin(); j < eSpaces.end(); j++)
{
PermanentMemSpace *space = *j;
space->hierarchy = hierarchy; // Set the hierarchy of the new spaces.
space->spaceType = ST_PERMANENT;
// Put a dummy object to fill up the unused space.
if (space->topPointer != space->top)
FillUnusedSpace(space->writeAble(space->topPointer), space->top - space->topPointer);
// Put in a dummy object to fill the rest of the space.
pSpaces.push_back(space);
}
eSpaces.clear();
return true;
}
// Before we import a hierarchical saved state we need to turn any previously imported
// spaces into local spaces.
bool MemMgr::DemoteImportSpaces()
{
return PromoteExportSpaces(1); // Only truly permanent spaces are retained.
}
// Return the space for a given index
PermanentMemSpace *MemMgr::SpaceForIndex(unsigned index)
{
for (std::vector::iterator i = pSpaces.begin(); i < pSpaces.end(); i++)
{
PermanentMemSpace *space = *i;
if (space->index == index)
return space;
}
return NULL;
}
// In several places we assume that segments are filled with valid
// objects. This fills unused memory with one or more "byte" objects.
void MemMgr::FillUnusedSpace(PolyWord *base, uintptr_t words)
{
PolyWord *pDummy = base+1;
while (words > 0)
{
#ifdef POLYML32IN64
// Make sure that any dummy object we insert is properly aligned.
if (((uintptr_t)pDummy) & 4)
{
*pDummy++ = PolyWord::FromUnsigned(0);
words--;
continue;
}
#endif
POLYUNSIGNED oSize;
// If the space is larger than the maximum object size
// we will need several objects.
if (words > MAX_OBJECT_SIZE) oSize = MAX_OBJECT_SIZE;
else oSize = (POLYUNSIGNED)(words-1);
// Make this a byte object so it's always skipped.
((PolyObject*)pDummy)->SetLengthWord(oSize, F_BYTE_OBJ);
words -= oSize+1;
pDummy += oSize+1;
}
}
// Allocate an area of the heap of at least minWords and at most maxWords.
// This is used both when allocating single objects (when minWords and maxWords
// are the same) and when allocating heap segments. If there is insufficient
// space to satisfy the minimum it will return 0.
PolyWord *MemMgr::AllocHeapSpace(uintptr_t minWords, uintptr_t &maxWords, bool doAllocation)
{
PLocker locker(&allocLock);
// We try to distribute the allocations between the memory spaces
// so that at the next GC we don't have all the most recent cells in
// one space. The most recent cells will be more likely to survive a
// GC so distibuting them improves the load balance for a multi-thread GC.
nextAllocator++;
if (nextAllocator > gMem.lSpaces.size()) nextAllocator = 0;
unsigned j = nextAllocator;
for (std::vector::iterator i = lSpaces.begin(); i < lSpaces.end(); i++)
{
if (j >= gMem.lSpaces.size()) j = 0;
LocalMemSpace *space = gMem.lSpaces[j++];
if (space->allocationSpace)
{
uintptr_t available = space->freeSpace();
if (available > 0 && available >= minWords)
{
// Reduce the maximum value if we had less than that.
if (available < maxWords) maxWords = available;
#ifdef POLYML32IN64
// If necessary round down to an even boundary
if (maxWords & 1)
{
maxWords--;
space->lowerAllocPtr[maxWords] = PolyWord::FromUnsigned(0);
}
#endif
PolyWord *result = space->lowerAllocPtr; // Return the address.
if (doAllocation)
space->lowerAllocPtr += maxWords; // Allocate it.
#ifdef POLYML32IN64
ASSERT((uintptr_t)result & 4); // Must be odd-word aligned
#endif
return result;
}
}
}
// There isn't space in the existing areas - can we create a new area?
// The reason we don't have enough space could simply be that we want to
// allocate an object larger than the default space size. Try deleting
// some other spaces to bring currentAllocSpace below spaceBeforeMinorGC - minWords.
if (minWords > defaultSpaceSize && minWords < spaceBeforeMinorGC)
RemoveExcessAllocation(spaceBeforeMinorGC - minWords);
if (currentAllocSpace/* + minWords */ < spaceBeforeMinorGC)
{
// i.e. the current allocation space is less than the space allowed for the minor GC
// but it may be that allocating this object will take us over the limit. We allow
// that to happen so that we can successfully allocate very large objects even if
// we have a new GC very shortly.
uintptr_t spaceSize = defaultSpaceSize;
#ifdef POLYML32IN64
// When we create the allocation space we take one word so that the first
// length word is on an odd-word boundary. We need to allow for that otherwise
// we may have available < minWords.
if (minWords >= spaceSize) spaceSize = minWords+1; // If we really want a large space.
#else
if (minWords > spaceSize) spaceSize = minWords; // If we really want a large space.
#endif
LocalMemSpace *space = CreateAllocationSpace(spaceSize);
if (space == 0) return 0; // Can't allocate it
// Allocate our space in this new area.
uintptr_t available = space->freeSpace();
ASSERT(available >= minWords);
if (available < maxWords)
{
maxWords = available;
#ifdef POLYML32IN64
// If necessary round down to an even boundary
if (maxWords & 1)
{
maxWords--;
space->lowerAllocPtr[maxWords] = PolyWord::FromUnsigned(0);
}
#endif
}
PolyWord *result = space->lowerAllocPtr; // Return the address.
if (doAllocation)
space->lowerAllocPtr += maxWords; // Allocate it.
#ifdef POLYML32IN64
ASSERT((uintptr_t)result & 4); // Must be odd-word aligned
#endif
return result;
}
return 0; // There isn't space even for the minimum.
}
CodeSpace::CodeSpace(PolyWord *start, PolyWord *shadow, uintptr_t spaceSize, OSMem *alloc): MarkableSpace(alloc)
{
bottom = start;
shadowSpace = shadow;
top = start+spaceSize;
isMutable = true; // Make it mutable just in case. This will cause it to be scanned.
isCode = true;
spaceType = ST_CODE;
#ifdef POLYML32IN64
// Dummy word so that the cell itself, after the length word, is on an 8-byte boundary.
writeAble(start)[0] = PolyWord::FromUnsigned(0);
largestFree = spaceSize - 2;
firstFree = start+1;
#else
largestFree = spaceSize - 1;
firstFree = start;
#endif
}
CodeSpace *MemMgr::NewCodeSpace(uintptr_t size)
{
// Allocate a new area and add it at the end of the table.
CodeSpace *allocSpace = 0;
// Allocate a new mutable, code space. N.B. This may round up "actualSize".
size_t actualSize = size * sizeof(PolyWord);
void* shadow;
PolyWord *mem =
(PolyWord*)osCodeAlloc.AllocateCodeArea(actualSize, shadow);
if (mem != 0)
{
try {
allocSpace = new CodeSpace(mem, (PolyWord*)shadow, actualSize / sizeof(PolyWord), &osCodeAlloc);
allocSpace->shadowSpace = (PolyWord*)shadow;
if (!allocSpace->headerMap.Create(allocSpace->spaceSize()))
{
delete allocSpace;
allocSpace = 0;
}
else if (!AddCodeSpace(allocSpace))
{
delete allocSpace;
allocSpace = 0;
}
else if (debugOptions & DEBUG_MEMMGR)
Log("MMGR: New code space %p allocated at %p size %lu\n", allocSpace, allocSpace->bottom, allocSpace->spaceSize());
// Put in a byte cell to mark the area as unallocated.
FillUnusedSpace(allocSpace->writeAble(allocSpace->firstFree), allocSpace->top- allocSpace->firstFree);
}
catch (std::bad_alloc&)
{
}
if (allocSpace == 0)
{
osCodeAlloc.FreeCodeArea(mem, shadow, actualSize);
mem = 0;
}
}
return allocSpace;
}
// Allocate memory for a piece of code. This needs to be both mutable and executable,
// at least for native code. The interpreted version need not (should not?) make the
// area executable. It will not be executed until the mutable bit has been cleared.
// Once code is allocated it is not GCed or moved.
// initCell is a byte cell that is copied into the new code area.
PolyObject* MemMgr::AllocCodeSpace(POLYUNSIGNED requiredSize)
{
PLocker locker(&codeSpaceLock);
// Search the code spaces until we find a free area big enough.
size_t i = 0;
while (true)
{
if (i != cSpaces.size())
{
CodeSpace *space = cSpaces[i];
if (space->largestFree >= requiredSize)
{
POLYUNSIGNED actualLargest = 0;
while (space->firstFree < space->top)
{
PolyObject *obj = (PolyObject*)(space->firstFree+1);
// Skip over allocated areas or free areas that are too small.
if (obj->IsCodeObject() || obj->Length() < 8)
space->firstFree += obj->Length()+1;
else break;
}
PolyWord *pt = space->firstFree;
while (pt < space->top)
{
PolyObject *obj = (PolyObject*)(pt+1);
POLYUNSIGNED length = obj->Length();
if (obj->IsByteObject())
{
if (length >= requiredSize)
{
// Free and large enough
PolyWord *next = pt+requiredSize+1;
POLYUNSIGNED spare = length - requiredSize;
#ifdef POLYML32IN64
// Maintain alignment.
if (((requiredSize + 1) & 1) && spare != 0)
{
space->writeAble(next++)[0] = PolyWord::FromUnsigned(0);
spare--;
}
#endif
if (spare != 0)
FillUnusedSpace(space->writeAble(next), spare);
space->isMutable = true; // Set this - it ensures the area is scanned on GC.
space->headerMap.SetBit(pt-space->bottom); // Set the "header" bit
// Set the length word of the code area and copy the byte cell in.
// The code bit must be set before the lock is released to ensure
// another thread doesn't reuse this.
space->writeAble(obj)->SetLengthWord(requiredSize, F_CODE_OBJ|F_MUTABLE_BIT);
return obj;
}
else if (length >= actualLargest) actualLargest = length+1;
}
pt += length+1;
}
// Reached the end without finding what we wanted. Update the largest size.
space->largestFree = actualLargest;
}
i++; // Next area
}
else
{
// Allocate a new area and add it at the end of the table.
uintptr_t spaceSize = requiredSize + 1;
#ifdef POLYML32IN64
// We need to allow for the extra alignment word otherwise we
// may allocate less than we need.
spaceSize += 1;
#endif
CodeSpace *allocSpace = NewCodeSpace(spaceSize);
if (allocSpace == 0)
return 0; // Try a GC.
globalStats.incSize(PSS_CODE_SPACE, allocSpace->spaceSize() * sizeof(PolyWord));
}
}
}
// Remove code areas that are completely empty. This is probably better than waiting to reuse them.
// It's particularly important if we reload a saved state because the code areas for old saved states
// are made into local code areas just in case they are currently in use or reachable.
void MemMgr::RemoveEmptyCodeAreas()
{
for (std::vector::iterator i = cSpaces.begin(); i != cSpaces.end(); )
{
CodeSpace *space = *i;
PolyObject *start = (PolyObject *)(space->bottom+1);
if (start->IsByteObject() && start->Length() == space->spaceSize()-1)
{
if (debugOptions & DEBUG_MEMMGR)
Log("MMGR: Deleted code space %p at %p size %zu\n", space, space->bottom, space->spaceSize());
globalStats.decSize(PSS_CODE_SPACE, space->spaceSize() * sizeof(PolyWord));
// We have an empty cell that fills the whole space.
RemoveTree(space);
delete(space);
i = cSpaces.erase(i);
}
else i++;
}
}
// Add a code space to the tables. Used both for newly compiled code and also demoted saved spaces.
bool MemMgr::AddCodeSpace(CodeSpace *space)
{
try {
AddTree(space);
cSpaces.push_back(space);
}
catch (std::exception&) {
RemoveTree(space);
return false;
}
return true;
}
// Check that we have sufficient space for an allocation to succeed.
// Called from the GC to ensure that we will not get into an infinite
// loop trying to allocate, failing and garbage-collecting again.
bool MemMgr::CheckForAllocation(uintptr_t words)
{
uintptr_t allocated = 0;
return AllocHeapSpace(words, allocated, false) != 0;
}
// Adjust the allocation area by removing free areas so that the total
// size of the allocation area is less than the required value. This
// is used after the quick GC and also if we need to allocate a large
// object.
void MemMgr::RemoveExcessAllocation(uintptr_t words)
{
// First remove any non-standard allocation areas.
for (std::vector::iterator i = lSpaces.begin(); i < lSpaces.end();)
{
LocalMemSpace *space = *i;
if (space->allocationSpace && space->isEmpty() &&
space->spaceSize() != defaultSpaceSize)
DeleteLocalSpace(i);
else i++;
}
for (std::vector::iterator i = lSpaces.begin(); currentAllocSpace > words && i < lSpaces.end(); )
{
LocalMemSpace *space = *i;
if (space->allocationSpace && space->isEmpty())
DeleteLocalSpace(i);
else i++;
}
}
// Return number of words free in all allocation spaces.
uintptr_t MemMgr::GetFreeAllocSpace()
{
uintptr_t freeSpace = 0;
PLocker lock(&allocLock);
for (std::vector::iterator i = lSpaces.begin(); i < lSpaces.end(); i++)
{
LocalMemSpace *space = *i;
if (space->allocationSpace)
freeSpace += space->freeSpace();
}
return freeSpace;
}
StackSpace *MemMgr::NewStackSpace(uintptr_t size)
{
PLocker lock(&stackSpaceLock);
try {
StackSpace *space = new StackSpace(&osStackAlloc);
size_t iSpace = size*sizeof(PolyWord);
space->bottom = (PolyWord*)osStackAlloc.AllocateDataArea(iSpace);
if (space->bottom == 0)
{
if (debugOptions & DEBUG_MEMMGR)
Log("MMGR: New stack space: insufficient space\n");
delete space;
return 0;
}
// The size may have been rounded up to a block boundary.
size = iSpace/sizeof(PolyWord);
space->top = space->bottom + size;
space->spaceType = ST_STACK;
space->isMutable = true;
// Add the stack space to the tree. This ensures that operations such as
// LocalSpaceForAddress will work for addresses within the stack. We can
// get them in the RTS with functions such as quot_rem and exception stack.
// It's not clear whether they really appear in the GC.
try {
AddTree(space);
sSpaces.push_back(space);
}
catch (std::exception&) {
RemoveTree(space);
delete space;
return 0;
}
if (debugOptions & DEBUG_MEMMGR)
Log("MMGR: New stack space %p allocated at %p size %lu\n", space, space->bottom, space->spaceSize());
globalStats.incSize(PSS_STACK_SPACE, space->spaceSize() * sizeof(PolyWord));
return space;
}
catch (std::bad_alloc&) {
if (debugOptions & DEBUG_MEMMGR)
Log("MMGR: New stack space: \"new\" failed\n");
return 0;
}
}
// If checkmem is given write protect the immutable areas except during a GC.
void MemMgr::ProtectImmutable(bool on)
{
if (debugOptions & DEBUG_CHECK_OBJECTS)
{
for (std::vector::iterator i = lSpaces.begin(); i < lSpaces.end(); i++)
{
LocalMemSpace *space = *i;
if (!space->isMutable)
{
if (!space->isCode)
osHeapAlloc.EnableWrite(!on, space->bottom, (char*)space->top - (char*)space->bottom);
}
}
}
}
bool MemMgr::GrowOrShrinkStack(TaskData *taskData, uintptr_t newSize)
{
StackSpace *space = taskData->stack;
size_t iSpace = newSize*sizeof(PolyWord);
PolyWord *newSpace = (PolyWord*)osStackAlloc.AllocateDataArea(iSpace);
if (newSpace == 0)
{
if (debugOptions & DEBUG_MEMMGR)
Log("MMGR: Unable to change size of stack %p from %lu to %lu: insufficient space\n",
space, space->spaceSize(), newSize);
return false;
}
// The size may have been rounded up to a block boundary.
newSize = iSpace/sizeof(PolyWord);
try {
AddTree(space, newSpace, newSpace+newSize);
}
catch (std::bad_alloc&) {
RemoveTree(space, newSpace, newSpace+newSize);
delete space;
return 0;
}
taskData->CopyStackFrame(space->stack(), space->spaceSize(), (StackObject*)newSpace, newSize);
if (debugOptions & DEBUG_MEMMGR)
Log("MMGR: Size of stack %p changed from %lu to %lu at %p\n", space, space->spaceSize(), newSize, newSpace);
globalStats.incSize(PSS_STACK_SPACE, (newSize - space->spaceSize()) * sizeof(PolyWord));
RemoveTree(space); // Remove it BEFORE freeing the space - another thread may allocate it
PolyWord *oldBottom = space->bottom;
size_t oldSize = (char*)space->top - (char*)space->bottom;
space->bottom = newSpace; // Switch this before freeing - We could get a profile trap during the free
space->top = newSpace+newSize;
osStackAlloc.FreeDataArea(oldBottom, oldSize);
return true;
}
// Delete a stack when a thread has finished.
// This can be called by an ML thread so needs an interlock.
bool MemMgr::DeleteStackSpace(StackSpace *space)
{
PLocker lock(&stackSpaceLock);
for (std::vector::iterator i = sSpaces.begin(); i < sSpaces.end(); i++)
{
if (*i == space)
{
globalStats.decSize(PSS_STACK_SPACE, space->spaceSize() * sizeof(PolyWord));
RemoveTree(space);
delete space;
sSpaces.erase(i);
if (debugOptions & DEBUG_MEMMGR)
Log("MMGR: Deleted stack space %p at %p size %zu\n", space, space->bottom, space->spaceSize());
return true;
}
}
ASSERT(false); // It should always be in the table.
return false;
}
SpaceTreeTree::SpaceTreeTree(): SpaceTree(false)
{
for (unsigned i = 0; i < 256; i++)
tree[i] = 0;
}
SpaceTreeTree::~SpaceTreeTree()
{
for (unsigned i = 0; i < 256; i++)
{
if (tree[i] && ! tree[i]->isSpace)
delete(tree[i]);
}
}
// Add and remove entries in the space tree.
void MemMgr::AddTree(MemSpace *space, PolyWord *startS, PolyWord *endS)
{
// It isn't clear we need to lock here but it's probably sensible.
PLocker lock(&spaceTreeLock);
AddTreeRange(&spaceTree, space, (uintptr_t)startS, (uintptr_t)endS);
}
void MemMgr::RemoveTree(MemSpace *space, PolyWord *startS, PolyWord *endS)
{
PLocker lock(&spaceTreeLock);
RemoveTreeRange(&spaceTree, space, (uintptr_t)startS, (uintptr_t)endS);
}
void MemMgr::AddTreeRange(SpaceTree **tt, MemSpace *space, uintptr_t startS, uintptr_t endS)
{
if (*tt == 0)
*tt = new SpaceTreeTree;
ASSERT(! (*tt)->isSpace);
SpaceTreeTree *t = (SpaceTreeTree*)*tt;
const unsigned shift = (sizeof(void*)-1) * 8; // Takes the high-order byte
uintptr_t r = startS >> shift;
ASSERT(r < 256);
const uintptr_t s = endS == 0 ? 256 : endS >> shift;
ASSERT(s >= r && s <= 256);
if (r == s) // Wholly within this entry
AddTreeRange(&(t->tree[r]), space, startS << 8, endS << 8);
else
{
// Deal with any remainder at the start.
if ((r << shift) != startS)
{
AddTreeRange(&(t->tree[r]), space, startS << 8, 0 /*End of range*/);
r++;
}
// Whole entries.
while (r < s)
{
ASSERT(t->tree[r] == 0);
t->tree[r] = space;
r++;
}
// Remainder at the end.
if ((s << shift) != endS)
AddTreeRange(&(t->tree[r]), space, 0, endS << 8);
}
}
// Remove an entry from the tree for a range. Strictly speaking we don't need the
// space argument here but it's useful as a check.
// This may be called to remove a partially installed structure if we have
// run out of space in AddTreeRange.
void MemMgr::RemoveTreeRange(SpaceTree **tt, MemSpace *space, uintptr_t startS, uintptr_t endS)
{
SpaceTreeTree *t = (SpaceTreeTree*)*tt;
if (t == 0)
return; // This can only occur if we're recovering.
ASSERT(! t->isSpace);
const unsigned shift = (sizeof(void*)-1) * 8;
uintptr_t r = startS >> shift;
const uintptr_t s = endS == 0 ? 256 : endS >> shift;
if (r == s)
RemoveTreeRange(&(t->tree[r]), space, startS << 8, endS << 8);
else
{
// Deal with any remainder at the start.
if ((r << shift) != startS)
{
RemoveTreeRange(&(t->tree[r]), space, startS << 8, 0);
r++;
}
// Whole entries.
while (r < s)
{
ASSERT(t->tree[r] == space || t->tree[r] == 0 /* Recovery only */);
t->tree[r] = 0;
r++;
}
// Remainder at the end.
if ((s << shift) != endS)
RemoveTreeRange(&(t->tree[r]), space, 0, endS << 8);
}
// See if the whole vector is now empty.
for (unsigned j = 0; j < 256; j++)
{
if (t->tree[j])
return; // It's not empty - we're done.
}
delete(t);
*tt = 0;
}
uintptr_t MemMgr::AllocatedInAlloc()
{
uintptr_t inAlloc = 0;
for (std::vector::iterator i = lSpaces.begin(); i < lSpaces.end(); i++)
{
LocalMemSpace *sp = *i;
if (sp->allocationSpace) inAlloc += sp->allocatedSpace();
}
return inAlloc;
}
// Report heap sizes and occupancy before and after GC
void MemMgr::ReportHeapSizes(const char *phase)
{
uintptr_t alloc = 0, nonAlloc = 0, inAlloc = 0, inNonAlloc = 0;
for (std::vector::iterator i = lSpaces.begin(); i < lSpaces.end(); i++)
{
LocalMemSpace *sp = *i;
if (sp->allocationSpace)
{
alloc += sp->spaceSize();
inAlloc += sp->allocatedSpace();
}
else
{
nonAlloc += sp->spaceSize();
inNonAlloc += sp->allocatedSpace();
}
}
Log("Heap: %s Major heap used ", phase);
LogSize(inNonAlloc); Log(" of ");
LogSize(nonAlloc);
Log(" (%1.0f%%). Alloc space used ", (float)inNonAlloc / (float)nonAlloc * 100.0F);
LogSize(inAlloc); Log(" of ");
LogSize(alloc);
Log(" (%1.0f%%). Total space ", (float)inAlloc / (float)alloc * 100.0F);
LogSize(spaceForHeap);
Log(" %1.0f%% full.\n", (float)(inAlloc + inNonAlloc) / (float)spaceForHeap * 100.0F);
Log("Heap: Local spaces %" PRI_SIZET ", permanent spaces %" PRI_SIZET ", code spaces %" PRI_SIZET ", stack spaces %" PRI_SIZET "\n",
lSpaces.size(), pSpaces.size(), cSpaces.size(), sSpaces.size());
uintptr_t cTotal = 0, cOccupied = 0;
for (std::vector::iterator c = cSpaces.begin(); c != cSpaces.end(); c++)
{
cTotal += (*c)->spaceSize();
PolyWord *pt = (*c)->bottom;
while (pt < (*c)->top)
{
pt++;
PolyObject *obj = (PolyObject*)pt;
if (obj->ContainsForwardingPtr())
{
#ifdef POLYML32IN64
// This is relative to globalCodeBase not globalHeapBase
while (obj->ContainsForwardingPtr())
obj = (PolyObject*)(globalCodeBase + ((obj->LengthWord() & ~_OBJ_TOMBSTONE_BIT) << 1));
#else
obj = obj->FollowForwardingChain();
#endif
pt += obj->Length();
}
else
{
if (obj->IsCodeObject())
cOccupied += obj->Length() + 1;
pt += obj->Length();
}
}
}
Log("Heap: Code area: total "); LogSize(cTotal); Log(" occupied: "); LogSize(cOccupied); Log("\n");
uintptr_t stackSpace = 0;
for (std::vector::iterator s = sSpaces.begin(); s != sSpaces.end(); s++)
{
stackSpace += (*s)->spaceSize();
}
Log("Heap: Stack area: total "); LogSize(stackSpace); Log("\n");
}
// Profiling - Find a code object or return zero if not found.
// This can be called on a "user" thread.
PolyObject *MemMgr::FindCodeObject(const byte *addr)
{
MemSpace *space = SpaceForAddress(addr);
if (space == 0) return 0;
Bitmap *profMap = 0;
if (! space->isCode) return 0;
if (space->spaceType == ST_CODE)
{
CodeSpace *cSpace = (CodeSpace*)space;
profMap = &cSpace->headerMap;
}
else if (space->spaceType == ST_PERMANENT)
{
PermanentMemSpace *pSpace = (PermanentMemSpace*)space;
profMap = &pSpace->profileCode;
}
else return 0; // Must be in code or permanent code.
// For the permanent areas the header maps are created and initialised on demand.
if (! profMap->Created())
{
PLocker lock(&codeBitmapLock);
if (! profMap->Created()) // Second check now we've got the lock.
{
// Create the bitmap. If it failed just say "not in this area"
if (! profMap->Create(space->spaceSize()))
return 0;
// Set the first bit before releasing the lock.
profMap->SetBit(0);
}
}
// A bit is set if it is a length word.
while ((uintptr_t)addr & (sizeof(POLYUNSIGNED)-1)) addr--; // Make it word aligned
PolyWord *wordAddr = (PolyWord*)addr;
// Work back to find the first set bit before this.
// Normally we will find one but if we're looking up a value that
// is actually an integer it might be in a piece of code that is now free.
uintptr_t bitOffset = profMap->FindLastSet(wordAddr - space->bottom);
if (space->spaceType == ST_CODE)
{
PolyWord *ptr = space->bottom+bitOffset;
if (ptr >= space->top) return 0;
// This will find the last non-free code cell or the first cell.
// Return zero if the value was not actually in the cell or it wasn't code.
PolyObject *obj = (PolyObject*)(ptr+1);
#ifdef POLYML32IN64
PolyObject *lastObj = obj;
// This is relative to globalCodeBase not globalHeapBase.
while (lastObj->ContainsForwardingPtr())
lastObj = (PolyObject*)(globalCodeBase + ((lastObj->LengthWord() & ~_OBJ_TOMBSTONE_BIT) << 1));
#else
PolyObject *lastObj = obj->FollowForwardingChain();
#endif
// We normally replace forwarding pointers but when scanning to update
// addresses after a saved state we may not have yet done that.
if (wordAddr > ptr && wordAddr < ptr + 1 + lastObj->Length() && lastObj->IsCodeObject())
return obj;
else return 0;
}
// Permanent area - the bits are set on demand.
// Now work forward, setting any bits if necessary. We don't need a lock
// because this is monotonic.
for (;;)
{
PolyWord *ptr = space->bottom+bitOffset;
if (ptr >= space->top) return 0;
PolyObject *obj = (PolyObject*)(ptr+1);
ASSERT(obj->ContainsNormalLengthWord());
if (wordAddr > ptr && wordAddr < ptr + obj->Length())
return obj;
bitOffset += obj->Length()+1;
profMap->SetBit(bitOffset);
}
return 0;
}
// Remove profiling bitmaps from permanent areas to free up memory.
void MemMgr::RemoveProfilingBitmaps()
{
for (std::vector::iterator i = pSpaces.begin(); i < pSpaces.end(); i++)
(*i)->profileCode.Destroy();
}
#ifdef POLYML32IN64DEBUG
POLYOBJECTPTR PolyWord::AddressToObjectPtr(void *address)
{
ASSERT(address >= globalHeapBase);
uintptr_t offset = (PolyWord*)address - globalHeapBase;
ASSERT(offset <= 0x7fffffff); // Currently limited to 8Gbytes
ASSERT((offset & 1) == 0);
return (POLYOBJECTPTR)offset;
}
#endif
MemMgr gMem; // The one and only memory manager object
diff --git a/libpolyml/memmgr.h b/libpolyml/memmgr.h
index 9b1bdb3a..de722e4c 100644
--- a/libpolyml/memmgr.h
+++ b/libpolyml/memmgr.h
@@ -1,440 +1,453 @@
/*
Title: memmgr.h Memory segment manager
- Copyright (c) 2006-8, 2010-12, 2016-18, 2020 David C. J. Matthews
+ Copyright (c) 2006-8, 2010-12, 2016-18, 2020, 2021 David C. J. Matthews
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef MEMMGR_H
#define MEMMGR_H
#include "bitmap.h"
#include "locking.h"
#include "osmem.h"
#include
// utility conversion macros
#define Words_to_K(w) (w*sizeof(PolyWord))/1024
#define Words_to_M(w) (w*sizeof(PolyWord))/(1<<20)
#define B_to_M(b) (b/(1<<20))
class ScanAddress;
class GCTaskId;
class TaskData;
typedef enum {
ST_PERMANENT, // Permanent areas are part of the object code
// Also loaded saved state.
ST_LOCAL, // Local heaps contain volatile data
ST_EXPORT, // Temporary export area
ST_STACK, // ML Stack for a thread
ST_CODE // Code created in the current run
} SpaceType;
// B-tree used in SpaceForAddress. Leaves are MemSpaces.
class SpaceTree
{
public:
SpaceTree(bool is): isSpace(is) { }
virtual ~SpaceTree() {}
bool isSpace;
};
// A non-leaf node in the B-tree
class SpaceTreeTree: public SpaceTree
{
public:
SpaceTreeTree();
virtual ~SpaceTreeTree();
SpaceTree *tree[256];
};
// Base class for the various memory spaces.
class MemSpace: public SpaceTree
{
protected:
MemSpace(OSMem *alloc);
virtual ~MemSpace();
public:
SpaceType spaceType;
bool isMutable;
bool isCode;
PolyWord *bottom; // Bottom of area
PolyWord *top; // Top of area.
OSMem *allocator; // Used to free the area. May be null.
PolyWord *shadowSpace; // Extra writable area for code if necessary
uintptr_t spaceSize(void)const { return top-bottom; } // No of words
// These next two are used in the GC to limit scanning for
// weak refs.
PolyWord *lowestWeak, *highestWeak;
// Used when printing debugging info
virtual const char *spaceTypeString() { return isMutable ? "mutable" : "immutable"; }
// Return the writeable address if this is in read-only code.
byte* writeAble(byte* p) {
if (shadowSpace != 0)
return (p - (byte*)bottom + (byte*)shadowSpace);
else return p;
}
PolyWord* writeAble(PolyWord* p) {
if (shadowSpace != 0)
return (p - bottom + shadowSpace);
else return p;
}
PolyObject* writeAble(PolyObject* p) {
return (PolyObject*)writeAble((PolyWord *) p);
}
friend class MemMgr;
};
// Permanent memory space. Either linked into the executable program or
// loaded from a saved state file.
class PermanentMemSpace: public MemSpace
{
protected:
PermanentMemSpace(OSMem *alloc): MemSpace(alloc), index(0), hierarchy(0), noOverwrite(false),
- byteOnly(false), topPointer(0) {}
+ byteOnly(false), constArea(false), topPointer(0) {}
public:
unsigned index; // An identifier for the space. Used when saving and loading.
unsigned hierarchy; // The hierarchy number: 0=from executable, 1=top level saved state, ...
bool noOverwrite; // Don't save this in deeper hierarchies.
bool byteOnly; // Only contains byte data - no need to scan for addresses.
+ bool constArea; // Contains constants rather than code. Special case for exporting PIE.
// When exporting or saving state we copy data into a new area.
// This area grows upwards unlike the local areas that grow down.
PolyWord *topPointer;
Bitmap shareBitmap; // Used in sharedata
Bitmap profileCode; // Used when profiling
friend class MemMgr;
};
#define NSTARTS 10
// Markable spaces are used as the base class for local heap
// spaces and code spaces.
class MarkableSpace: public MemSpace
{
protected:
MarkableSpace(OSMem *alloc);
virtual ~MarkableSpace() {}
public:
PolyWord *fullGCRescanStart; // Upper and lower limits for rescan during mark phase.
PolyWord *fullGCRescanEnd;
PLock spaceLock; // Lock used to protect forwarding pointers
};
// Local areas can be garbage collected.
class LocalMemSpace: public MarkableSpace
{
protected:
LocalMemSpace(OSMem *alloc);
virtual ~LocalMemSpace() {}
bool InitSpace(PolyWord *heapPtr, uintptr_t size, bool mut);
public:
// Allocation. The minor GC allocates at the bottom of the areas while the
// major GC and initial allocations are made at the top. The reason for this
// is that it's only possible to scan objects from the bottom up and the minor
// GC combines scanning with allocation whereas the major GC compacts from the
// bottom into the top of an area.
PolyWord *upperAllocPtr; // Allocation pointer. Objects are allocated AFTER this.
PolyWord *lowerAllocPtr; // Allocation pointer. Objects are allocated BEFORE this.
PolyWord *fullGCLowerLimit;// Lowest object in area before copying.
PolyWord *partialGCTop; // Value of upperAllocPtr before the current partial GC.
PolyWord *partialGCScan; // Scan pointer used in minor GC
PolyWord *partialGCRootBase; // Start of the root objects.
PolyWord *partialGCRootTop;// Value of lowerAllocPtr after the roots have been copied.
GCTaskId *spaceOwner; // The thread that "owns" this space during a GC.
Bitmap bitmap; /* bitmap with one bit for each word in the GC area. */
PLock bitmapLock; // Lock used in GC sharing pass.
bool allocationSpace; // True if this is (mutable) space for initial allocation
uintptr_t start[NSTARTS]; /* starting points for bit searches. */
unsigned start_index; /* last index used to index start array */
uintptr_t i_marked; /* count of immutable words marked. */
uintptr_t m_marked; /* count of mutable words marked. */
uintptr_t updated; /* count of words updated. */
uintptr_t allocatedSpace(void)const // Words allocated
{ return (top-upperAllocPtr) + (lowerAllocPtr-bottom); }
uintptr_t freeSpace(void)const // Words free
{ return upperAllocPtr-lowerAllocPtr; }
#ifdef POLYML32IN64
// We will generally set a zero cell for alignment.
bool isEmpty(void)const { return allocatedSpace() <= 1; }
#else
bool isEmpty(void)const { return allocatedSpace() == 0; }
#endif
virtual const char *spaceTypeString()
{ return allocationSpace ? "allocation" : MemSpace::spaceTypeString(); }
// Used when converting to and from bit positions in the bitmap
uintptr_t wordNo(PolyWord *pt) { return pt - bottom; }
PolyWord *wordAddr(uintptr_t bitno) { return bottom + bitno; }
friend class MemMgr;
};
class StackObject; // Abstract - Architecture specific
// Stack spaces. These are managed by the thread module
class StackSpace: public MemSpace
{
public:
StackSpace(OSMem *alloc): MemSpace(alloc) { }
StackObject *stack()const { return (StackObject *)bottom; }
};
// Code Space. These contain local code created by the compiler.
class CodeSpace: public MarkableSpace
{
public:
CodeSpace(PolyWord *start, PolyWord *shadow, uintptr_t spaceSize, OSMem *alloc);
Bitmap headerMap; // Map to find the headers during GC or profiling.
uintptr_t largestFree; // The largest free space in the area
PolyWord *firstFree; // The start of the first free space in the area.
};
class MemMgr
{
public:
MemMgr();
~MemMgr();
bool Initialise();
// Create a local space for initial allocation.
LocalMemSpace *CreateAllocationSpace(uintptr_t size);
// Create and initialise a new local space and add it to the table.
LocalMemSpace *NewLocalSpace(uintptr_t size, bool mut);
// Create an entry for a permanent space.
PermanentMemSpace *NewPermanentSpace(PolyWord *base, uintptr_t words,
unsigned flags, unsigned index, unsigned hierarchy = 0);
// Create a permanent space but allocate memory for it.
// Sets bottom and top to the actual memory size.
PermanentMemSpace *AllocateNewPermanentSpace(uintptr_t byteSize, unsigned flags,
unsigned index, unsigned hierarchy = 0);
// Called after an allocated permanent area has been filled in.
bool CompletePermanentSpaceAllocation(PermanentMemSpace *space);
// Delete a local space. Takes the iterator position in lSpaces and returns the
// iterator after deletion.
void DeleteLocalSpace(std::vector::iterator &iter);
// Allocate an area of the heap of at least minWords and at most maxWords.
// This is used both when allocating single objects (when minWords and maxWords
// are the same) and when allocating heap segments. If there is insufficient
// space to satisfy the minimum it will return 0. Updates "maxWords" with
// the space actually allocated
PolyWord *AllocHeapSpace(uintptr_t minWords, uintptr_t &maxWords, bool doAllocation = true);
PolyWord *AllocHeapSpace(uintptr_t words)
{ uintptr_t allocated = words; return AllocHeapSpace(words, allocated); }
CodeSpace *NewCodeSpace(uintptr_t size);
// Allocate space for code. This is initially mutable to allow the code to be built.
PolyObject *AllocCodeSpace(POLYUNSIGNED size);
// Check that a subsequent allocation will succeed. Called from the GC to ensure
bool CheckForAllocation(uintptr_t words);
// If an allocation space has a lot of data left in it, particularly a single
// large object we should turn it into a local area.
void ConvertAllocationSpaceToLocal(LocalMemSpace *space);
// Allocate space for the initial stack for a thread. The caller must
// initialise the new stack. Returns 0 if allocation fails.
StackSpace *NewStackSpace(uintptr_t size);
// Adjust the space for a stack. Returns true if it succeeded. If it failed
// it leaves the stack untouched.
bool GrowOrShrinkStack(TaskData *taskData, uintptr_t newSize);
// Delete a stack when a thread has finished.
bool DeleteStackSpace(StackSpace *space);
// Create and delete export spaces
PermanentMemSpace *NewExportSpace(uintptr_t size, bool mut, bool noOv, bool code);
void DeleteExportSpaces(void);
bool PromoteExportSpaces(unsigned hierarchy); // Turn export spaces into permanent spaces.
bool DemoteImportSpaces(void); // Turn previously imported spaces into local.
PermanentMemSpace *SpaceForIndex(unsigned index); // Return the space for a given index
// As a debugging check, write protect the immutable areas apart from during the GC.
void ProtectImmutable(bool on);
// Find a space that contains a given address. This is called for every cell
// during a GC so needs to be fast.,
// N.B. This must be called on an address at the beginning or within the cell.
// Generally that means with a pointer to the length word. Pointing at the
// first "data" word may give the wrong result if the length is zero.
MemSpace *SpaceForAddress(const void *pt) const
{
uintptr_t t = (uintptr_t)pt;
SpaceTree *tr = spaceTree;
// Each level of the tree is either a leaf or a vector of trees.
unsigned j = sizeof(void *)*8;
for (;;)
{
if (tr == 0 || tr->isSpace)
return (MemSpace*)tr;
j -= 8;
tr = ((SpaceTreeTree*)tr)->tree[(t >> j) & 0xff];
}
return 0;
}
// SpaceForAddress must NOT be applied to a PolyObject *. That's because
// it works nearly all the time except when a zero-sized object is placed
// at the end of page. Then the base address will be the start of the
// next page.
void SpaceForAddress(PolyObject *pt) {}
// Use this instead.
MemSpace* SpaceForObjectAddress(PolyObject* pt)
{ return SpaceForAddress(((PolyWord*)pt) - 1); }
// Find a local address for a space.
// N.B. The argument should generally be the length word. See
// comment on SpaceForAddress.
LocalMemSpace *LocalSpaceForAddress(const void *pt) const
{
MemSpace *s = SpaceForAddress(pt);
if (s != 0 && s->spaceType == ST_LOCAL)
return (LocalMemSpace*)s;
else return 0;
}
// LocalSpaceForAddress must NOT be applied to PolyObject*.
// See comment on SpaceForAddress above.
void LocalSpaceForAddress(PolyObject* pt) {}
LocalMemSpace* LocalSpaceForObjectAddress(PolyObject* pt)
{ return LocalSpaceForAddress(((PolyWord*)pt) - 1); }
void SetReservation(uintptr_t words) { reservedSpace = words; }
// In several places we assume that segments are filled with valid
// objects. This fills unused memory with one or more "byte" objects.
void FillUnusedSpace(PolyWord *base, uintptr_t words);
// Return number of words of free space for stats.
uintptr_t GetFreeAllocSpace();
// Remove unused local areas.
void RemoveEmptyLocals();
// Remove unused code areas.
void RemoveEmptyCodeAreas();
// Remove unused allocation areas to reduce the space below the limit.
void RemoveExcessAllocation(uintptr_t words);
void RemoveExcessAllocation() { RemoveExcessAllocation(spaceBeforeMinorGC); }
// Table for permanent spaces
std::vector pSpaces;
// Table for local spaces
std::vector lSpaces;
// Table for export spaces
std::vector eSpaces;
// Table for stack spaces
std::vector sSpaces;
PLock stackSpaceLock;
// Table for code spaces
std::vector cSpaces;
PLock codeSpaceLock;
// Storage manager lock.
PLock allocLock;
// Lock for creating new bitmaps for code profiling
PLock codeBitmapLock;
unsigned nextIndex; // Used when allocating new permanent spaces.
uintptr_t SpaceBeforeMinorGC() const { return spaceBeforeMinorGC; }
uintptr_t SpaceForHeap() const { return spaceForHeap; }
void SetSpaceBeforeMinorGC(uintptr_t minorSize) { spaceBeforeMinorGC = minorSize; }
void SetSpaceForHeap(uintptr_t heapSize) { spaceForHeap = heapSize; }
uintptr_t CurrentAllocSpace() { return currentAllocSpace; }
uintptr_t AllocatedInAlloc();
uintptr_t CurrentHeapSize() { return currentHeapSize; }
uintptr_t DefaultSpaceSize() const { return defaultSpaceSize; }
void ReportHeapSizes(const char *phase);
// Profiling - Find a code object or return zero if not found.
PolyObject *FindCodeObject(const byte *addr);
// Profiling - Free bitmaps to indicate start of an object.
void RemoveProfilingBitmaps();
private:
bool AddLocalSpace(LocalMemSpace *space);
bool AddCodeSpace(CodeSpace *space);
uintptr_t reservedSpace;
unsigned nextAllocator;
// The default size in words when creating new segments.
uintptr_t defaultSpaceSize;
// The number of words that can be used for initial allocation.
uintptr_t spaceBeforeMinorGC;
// The number of words that can be used for the heap
uintptr_t spaceForHeap;
// The current sizes of the allocation space and the total heap size.
uintptr_t currentAllocSpace, currentHeapSize;
// LocalSpaceForAddress is a hot-spot so we use a B-tree to convert addresses;
SpaceTree *spaceTree;
PLock spaceTreeLock;
void AddTree(MemSpace *space) { AddTree(space, space->bottom, space->top); }
void RemoveTree(MemSpace *space) { RemoveTree(space, space->bottom, space->top); }
void AddTree(MemSpace *space, PolyWord *startS, PolyWord *endS);
void RemoveTree(MemSpace *space, PolyWord *startS, PolyWord *endS);
void AddTreeRange(SpaceTree **t, MemSpace *space, uintptr_t startS, uintptr_t endS);
void RemoveTreeRange(SpaceTree **t, MemSpace *space, uintptr_t startS, uintptr_t endS);
- OSMem osHeapAlloc, osStackAlloc, osCodeAlloc;
+#ifdef POLYML32IN64
+ OSMemInRegion osHeapAlloc, osStackAlloc, osCodeAlloc;
+#else
+ OSMemUnrestricted osHeapAlloc, osStackAlloc;
+#ifdef HOSTARCHITECTURE_X86_64
+ // For X86/64 put the code in a 2GB area so it is always
+ // possible to use 32-bit relative displacements.
+ OSMemInRegion osCodeAlloc;
+#else
+ OSMemUnrestricted osCodeAlloc;
+#endif
+
+#endif
};
extern MemMgr gMem;
#endif
diff --git a/libpolyml/objsize.cpp b/libpolyml/objsize.cpp
index cf95f129..4f2a1023 100644
--- a/libpolyml/objsize.cpp
+++ b/libpolyml/objsize.cpp
@@ -1,432 +1,432 @@
/*
Title: Object size
Copyright (c) 2000
Cambridge University Technical Services Limited
- Further development David C.J. Matthews 2016, 2017
+ Further development David C.J. Matthews 2016, 2017, 2021
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#elif defined(_WIN32)
#include "winconfig.h"
#else
#error "No configuration file"
#endif
#ifdef HAVE_STDIO_H
#include
#endif
#ifdef HAVE_SYS_TYPES_H
#include
#endif
#ifdef HAVE_STDLIB_H
#include
#endif
#ifdef HAVE_STRING_H
#include
#endif
#ifdef HAVE_ASSERT_H
#include
#define ASSERT(x) assert(x)
#else
#define ASSERT(x)
#endif
#include "globals.h"
#include "arb.h"
#include "run_time.h"
#include "machine_dep.h"
#include "objsize.h"
#include "scanaddrs.h"
#include "polystring.h"
#include "save_vec.h"
#include "bitmap.h"
#include "memmgr.h"
#include "mpoly.h"
#include "processes.h"
#include "rtsentry.h"
extern "C" {
POLYEXTERNALSYMBOL POLYUNSIGNED PolyObjSize(FirstArgument threadId, PolyWord obj);
POLYEXTERNALSYMBOL POLYUNSIGNED PolyShowSize(FirstArgument threadId, PolyWord obj);
POLYEXTERNALSYMBOL POLYUNSIGNED PolyObjProfile(FirstArgument threadId, PolyWord obj);
}
extern FILE *polyStdout;
#define MAX_PROF_LEN 100 // Profile lengths between 1 and this
class ProcessVisitAddresses: public ScanAddress
{
public:
virtual POLYUNSIGNED ScanAddressAt(PolyWord *pt) { return ShowWord(*pt); }
virtual POLYUNSIGNED ScanCodeAddressAt(PolyObject **pt) { return ShowObject(*pt); }
virtual PolyObject *ScanObjectAddress(PolyObject *base);
POLYUNSIGNED ShowWord(PolyWord w) {
if (w.IsTagged() || w == PolyWord::FromUnsigned(0))
return 0;
else return ShowObject(w.AsObjPtr());
}
POLYUNSIGNED ShowObject(PolyObject *p);
ProcessVisitAddresses(bool show);
~ProcessVisitAddresses();
VisitBitmap *FindBitmap(PolyObject *p);
void ShowBytes(PolyObject *start);
void ShowCode(PolyObject *start);
void ShowWords(PolyObject *start);
POLYUNSIGNED total_length;
bool show_size;
VisitBitmap **bitmaps;
unsigned nBitmaps;
// Counts of objects of each size for mutable and immutable data.
unsigned iprofile[MAX_PROF_LEN+1];
unsigned mprofile[MAX_PROF_LEN+1];
};
ProcessVisitAddresses::ProcessVisitAddresses(bool show)
{
// Need to get the allocation lock here. Another thread
// could allocate new local areas resulting in gMem.nlSpaces
// and gMem.lSpaces changing under our feet.
PLocker lock(&gMem.allocLock);
total_length = 0;
show_size = show;
// Create a bitmap for each of the areas apart from the IO area
nBitmaps = (unsigned)(gMem.lSpaces.size()+gMem.pSpaces.size()+gMem.cSpaces.size()); //
bitmaps = new VisitBitmap*[nBitmaps];
unsigned bm = 0;
for (std::vector::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++)
{
MemSpace *space = *i;
// Permanent areas are filled with objects from the bottom.
bitmaps[bm++] = new VisitBitmap(space->bottom, space->top);
}
for (std::vector::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
{
LocalMemSpace *space = *i;
bitmaps[bm++] = new VisitBitmap(space->bottom, space->top);
}
for (std::vector::iterator i = gMem.cSpaces.begin(); i < gMem.cSpaces.end(); i++)
{
CodeSpace *space = *i;
bitmaps[bm++] = new VisitBitmap(space->bottom, space->top);
}
ASSERT(bm == nBitmaps);
// Clear the profile counts.
for (unsigned i = 0; i < MAX_PROF_LEN+1; i++)
{
iprofile[i] = mprofile[i] = 0;
}
}
ProcessVisitAddresses::~ProcessVisitAddresses()
{
if (bitmaps)
{
for (unsigned i = 0; i < nBitmaps; i++)
delete(bitmaps[i]);
delete[](bitmaps);
}
}
// Return the bitmap corresponding to the address or NULL if it isn't there.
VisitBitmap *ProcessVisitAddresses::FindBitmap(PolyObject *p)
{
for (unsigned i = 0; i < nBitmaps; i++)
{
VisitBitmap *bm = bitmaps[i];
if (bm->InRange((PolyWord*)p)) return bm;
}
return 0;
}
void ProcessVisitAddresses::ShowBytes(PolyObject *start)
{
POLYUNSIGNED bytes = start->Length() * sizeof(PolyWord);
char *array = (char *) start;
putc('\n', polyStdout);
if (start->IsMutable()) fprintf(polyStdout, "MUTABLE ");
fprintf(polyStdout, "BYTES:%p:%" POLYUFMT "\n", array, bytes);
POLYUNSIGNED i, n;
for (i = 0, n = 0; n < bytes; n++)
{
fprintf(polyStdout, "%02x ",array[n] & 0xff);
i++;
if (i == 16)
{
putc('\n', polyStdout);
i = 0;
}
}
if (i != 0) putc('\n', polyStdout);
}
#define MAXNAME 500
void ProcessVisitAddresses::ShowCode(PolyObject *start)
{
POLYUNSIGNED length = start->Length();
putc('\n', polyStdout);
if (start->IsMutable()) fprintf(polyStdout, "MUTABLE ");
char buffer[MAXNAME+1];
- PolyWord *consts = start->ConstPtrForCode();
+ PolyWord *consts = machineDependent->ConstPtrForCode(start);
PolyWord string = consts[0];
if (string == TAGGED(0))
strcpy(buffer, "");
else
(void) Poly_string_to_C(string, buffer, sizeof(buffer));
fprintf(polyStdout, "CODE:%p:%" POLYUFMT " %s\n", start, length, buffer);
POLYUNSIGNED i, n;
for (i = 0, n = 0; n < length; n++)
{
if (i != 0) putc('\t', polyStdout);
fprintf(polyStdout, "%8p ", start->Get(n).AsObjPtr());
i++;
if (i == 4)
{
putc('\n', polyStdout);
i = 0;
}
}
if (i != 0) putc('\n', polyStdout);
}
void ProcessVisitAddresses::ShowWords(PolyObject *start)
{
POLYUNSIGNED length = start->Length();
putc('\n', polyStdout);
if (start->IsMutable()) fprintf(polyStdout, "MUTABLE ");
fprintf(polyStdout, "%s:%p:%" POLYUFMT "\n",
start->IsClosureObject() ? "CLOSURE" : "WORDS", start, length);
POLYUNSIGNED i, n;
for (i = 0, n = 0; n < length; )
{
if (i != 0)
putc('\t', polyStdout);
if (start->IsClosureObject() && n == 0)
{
fprintf(polyStdout, "%8p ", *(PolyObject**)start);
n += sizeof(PolyObject*) / sizeof(PolyWord);
}
else
{
PolyWord p = start->Get(n);
if (p.IsTagged())
fprintf(polyStdout, "%08" POLYUFMT " ", p.AsUnsigned());
else fprintf(polyStdout, "%8p ", p.AsObjPtr());
n++;
}
i++;
if (i == 4)
{
putc('\n', polyStdout);
i = 0;
}
}
if (i != 0)
putc('\n', polyStdout);
}
// This is called initially to print the top-level object.
// Since we don't process stacks it probably doesn't get called elsewhere.
PolyObject *ProcessVisitAddresses::ScanObjectAddress(PolyObject *base)
{
POLYUNSIGNED lengthWord = ShowWord(base);
if (lengthWord)
ScanAddressesInObject(base, lengthWord);
return base;
}
// Handle the normal case. Print the object at this word and
// return true is it must be handled recursively.
POLYUNSIGNED ProcessVisitAddresses::ShowObject(PolyObject *p)
{
VisitBitmap *bm = FindBitmap(p);
if (bm == 0)
{
fprintf(polyStdout, "Bad address " ZERO_X "%p found\n", p);
return 0;
}
/* Have we already visited this object? */
if (bm->AlreadyVisited(p))
return 0;
bm->SetVisited(p);
POLYUNSIGNED obj_length = p->Length();
// Increment the appropriate size profile count.
if (p->IsMutable())
{
if (obj_length > MAX_PROF_LEN)
mprofile[MAX_PROF_LEN]++;
else
mprofile[obj_length]++;
}
else
{
if (obj_length > MAX_PROF_LEN)
iprofile[MAX_PROF_LEN]++;
else
iprofile[obj_length]++;
}
total_length += obj_length + 1; /* total space needed for object */
if (p->IsByteObject())
{
if (show_size)
ShowBytes(p);
return 0;
}
else if (p->IsCodeObject())
{
PolyWord *cp;
POLYUNSIGNED const_count;
- p->GetConstSegmentForCode(cp, const_count);
+ machineDependent->GetConstSegmentForCode(p, cp, const_count);
if (show_size)
ShowCode(p);
return p->LengthWord(); // Process addresses in it.
}
else // Word or closure object
{
if (show_size)
ShowWords(p);
return p->LengthWord(); // Process addresses in it.
}
}
Handle ObjSize(TaskData *taskData, Handle obj)
{
ProcessVisitAddresses process(false);
process.ScanObjectAddress(obj->WordP());
return Make_arbitrary_precision(taskData, process.total_length);
}
Handle ShowSize(TaskData *taskData, Handle obj)
{
ProcessVisitAddresses process(true);
process.ScanObjectAddress(obj->WordP());
fflush(polyStdout); /* We need this for Windows at least. */
return Make_arbitrary_precision(taskData, process.total_length);
}
static void printfprof(unsigned *counts)
{
for(unsigned i = 0; i < MAX_PROF_LEN+1; i++)
{
if (counts[i] != 0)
{
if (i == MAX_PROF_LEN)
fprintf(polyStdout, ">%d\t%u\n", MAX_PROF_LEN, counts[i]);
else
fprintf(polyStdout, "%d\t%u\n", i, counts[i]);
}
}
}
POLYUNSIGNED PolyObjSize(FirstArgument threadId, PolyWord obj)
{
TaskData *taskData = TaskData::FindTaskForId(threadId);
ASSERT(taskData != 0);
taskData->PreRTSCall();
ProcessVisitAddresses process(false);
if (!obj.IsTagged()) process.ScanObjectAddress(obj.AsObjPtr());
Handle result = Make_arbitrary_precision(taskData, process.total_length);
taskData->PostRTSCall();
return result->Word().AsUnsigned();
}
POLYUNSIGNED PolyShowSize(FirstArgument threadId, PolyWord obj)
{
TaskData *taskData = TaskData::FindTaskForId(threadId);
ASSERT(taskData != 0);
taskData->PreRTSCall();
ProcessVisitAddresses process(true);
if (!obj.IsTagged()) process.ScanObjectAddress(obj.AsObjPtr());
fflush(polyStdout); /* We need this for Windows at least. */
Handle result = Make_arbitrary_precision(taskData, process.total_length);
taskData->PostRTSCall();
return result->Word().AsUnsigned();
}
POLYUNSIGNED PolyObjProfile(FirstArgument threadId, PolyWord obj)
{
TaskData *taskData = TaskData::FindTaskForId(threadId);
ASSERT(taskData != 0);
taskData->PreRTSCall();
ProcessVisitAddresses process(false);
if (!obj.IsTagged()) process.ScanObjectAddress(obj.AsObjPtr());
fprintf(polyStdout, "\nImmutable object sizes and counts\n");
printfprof(process.iprofile);
fprintf(polyStdout, "\nMutable object sizes and counts\n");
printfprof(process.mprofile);
fflush(polyStdout); /* We need this for Windows at least. */
Handle result = Make_arbitrary_precision(taskData, process.total_length);
taskData->PostRTSCall();
return result->Word().AsUnsigned();
}
struct _entrypts objSizeEPT[] =
{
{ "PolyObjSize", (polyRTSFunction)&PolyObjSize},
{ "PolyShowSize", (polyRTSFunction)&PolyShowSize},
{ "PolyObjProfile", (polyRTSFunction)&PolyObjProfile},
{ NULL, NULL} // End of list.
};
diff --git a/libpolyml/osmem.h b/libpolyml/osmem.h
index 4d8fb551..1e2c440a 100644
--- a/libpolyml/osmem.h
+++ b/libpolyml/osmem.h
@@ -1,105 +1,143 @@
/*
Title: osomem.h - Interface to OS memory management
Copyright (c) 2006, 2017-18, 2020 David C.J. Matthews
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef OS_MEM_H_INCLUDED
#define OS_MEM_H_INCLUDED
// We need size_t so include these two here.
#ifdef HAVE_STRING_H
#include
#endif
#ifdef HAVE_STDLIB_H
#include
#endif
-#ifdef POLYML32IN64
#include "bitmap.h"
-#endif
-
#include "locking.h"
// This class provides access to the memory management provided by the
// operating system. It would be nice if we could always use malloc and
// free for this but we need to have execute permission on the code
// objects.
class OSMem {
public:
- OSMem();
- virtual ~OSMem();
+ OSMem() {}
+ virtual ~OSMem() {}
enum _MemUsage {
UsageData, // Data or code in the interpreted version
UsageStack, // Stack
UsageExecutableCode // Code in the native code versions.
};
- bool Initialise(enum _MemUsage usage, size_t space = 0, void** pBase = 0);
-
// Allocate space and return a pointer to it. The size is the minimum
// size requested in bytes and it is updated with the actual space allocated.
// Returns NULL if it cannot allocate the space.
- void *AllocateDataArea(size_t& bytes);
+ virtual void *AllocateDataArea(size_t& bytes) = 0;
// Release the space previously allocated. This must free the whole of
// the segment. The space must be the size actually allocated.
- bool FreeDataArea(void* p, size_t space);
+ virtual bool FreeDataArea(void* p, size_t space) = 0;
// Enable/disable writing. This must apply to the whole of a segment.
// Only for data areas.
- bool EnableWrite(bool enable, void* p, size_t space);
+ virtual bool EnableWrite(bool enable, void* p, size_t space) = 0;
// Allocate code area. Some systems will not allow both write and execute permissions
// on the same page. On those systems we have to allocate two regions of shared memory,
// one with read+execute permission and the other with read+write.
- void *AllocateCodeArea(size_t& bytes, void*& shadowArea);
+ virtual void *AllocateCodeArea(size_t& bytes, void*& shadowArea) = 0;
// Free the allocated areas.
- bool FreeCodeArea(void* codeAddr, void* dataAddr, size_t space);
+ virtual bool FreeCodeArea(void* codeAddr, void* dataAddr, size_t space) = 0;
// Remove write access. This is used after the permanent code area has been created
// either from importing a portable export file or copying the area in 32-in-64.
- bool DisableWriteForCode(void* codeAddr, void* dataAddr, size_t space);
+ virtual bool DisableWriteForCode(void* codeAddr, void* dataAddr, size_t space) = 0;
protected:
size_t pageSize;
enum _MemUsage memUsage;
+};
+
+// Allows the system to allocate pages.
+class OSMemUnrestricted: public OSMem {
+
+public:
+ OSMemUnrestricted();
+ virtual ~OSMemUnrestricted();
+
+ bool Initialise(enum _MemUsage usage);
+ virtual void* AllocateDataArea(size_t& bytes);
+ virtual bool FreeDataArea(void* p, size_t space);
+ virtual bool EnableWrite(bool enable, void* p, size_t space);
+ virtual void* AllocateCodeArea(size_t& bytes, void*& shadowArea);
+ virtual bool FreeCodeArea(void* codeAddr, void* dataAddr, size_t space);
+ virtual bool DisableWriteForCode(void* codeAddr, void* dataAddr, size_t space);
+
+protected:
+
+#ifndef _WIN32
+ // If we need to use dual areas because WRITE+EXECUTE permission is not allowed.
+ int shadowFd;
+ PLock allocLock;
+ size_t allocPtr;
+#endif
+
+};
+
+// Pages are allocated within a region. This is used for 32-in-64 and
+// for code in native X86 64-bit.
+class OSMemInRegion: public OSMem {
+
+public:
+ OSMemInRegion();
+ virtual ~OSMemInRegion();
+
+ bool Initialise(enum _MemUsage usage, size_t space, void** pBase);
+ virtual void* AllocateDataArea(size_t& bytes);
+ virtual bool FreeDataArea(void* p, size_t space);
+ virtual bool EnableWrite(bool enable, void* p, size_t space);
+ virtual void* AllocateCodeArea(size_t& bytes, void*& shadowArea);
+ virtual bool FreeCodeArea(void* codeAddr, void* dataAddr, size_t space);
+ virtual bool DisableWriteForCode(void* codeAddr, void* dataAddr, size_t space);
+
+protected:
+
#ifndef _WIN32
// If we need to use dual areas because WRITE+EXECUTE permission is not allowed.
int shadowFd;
PLock allocLock;
size_t allocPtr;
#endif
-#ifdef POLYML32IN64
Bitmap pageMap;
uintptr_t lastAllocated;
- char* memBase, *shadowBase;
+ char* memBase, * shadowBase;
PLock bitmapLock;
-#endif
-
};
#endif
diff --git a/libpolyml/osmemunix.cpp b/libpolyml/osmemunix.cpp
index 36cbecdc..c431a0d5 100644
--- a/libpolyml/osmemunix.cpp
+++ b/libpolyml/osmemunix.cpp
@@ -1,500 +1,496 @@
/*
Title: osomem.cpp - Interface to OS memory management - Unix version
Copyright (c) 2006, 2017-18, 2020-21 David C.J. Matthews
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#else
#error "No configuration file"
#endif
#if defined __linux__ && !defined _GNU_SOURCE
// _GNU_SOURCE must be defined before #include to get O_TEMPFILE etc.
#define _GNU_SOURCE 1
#endif
#ifdef HAVE_SYS_TYPES_H
#include
#endif
#ifdef HAVE_SYS_MMAN_H
#include
#endif
#ifdef HAVE_ASSERT_H
#include
#define ASSERT(x) assert(x)
#else
#define ASSERT(x)
#endif
#ifdef HAVE_UNISTD_H
#include
#endif
#ifdef HAVE_SYS_PARAM_H
#include
#endif
#ifdef HAVE_ERRNO_H
#include
#endif
#ifdef HAVE_STDLIB_H
#include
#endif
#ifdef HAVE_SYS_STAT_H
#include
#endif
#ifdef HAVE_FCNTL_H
#include
#endif
// Linux prefers MAP_ANONYMOUS to MAP_ANON
#ifndef MAP_ANON
#ifdef MAP_ANONYMOUS
#define MAP_ANON MAP_ANONYMOUS
#endif
#endif
// Assume that mmap is supported. If it isn't we can't run.
#include "osmem.h"
#include "bitmap.h"
#include "locking.h"
#include "polystring.h" // For TempCString
// How do we get the page size?
#ifndef HAVE_GETPAGESIZE
#ifdef _SC_PAGESIZE
#define getpagesize() sysconf(_SC_PAGESIZE)
#else
// If this fails we're stuck
#define getpagesize() PAGESIZE
#endif
#endif
#ifdef SOLARIS
#define FIXTYPE (caddr_t)
#else
#define FIXTYPE
#endif
// MAP_JIT is needed on Mac OS with hardened kernel
#ifndef MAP_JIT
#define MAP_JIT 0
#endif
// Open a temporary file, unlink it and return the file descriptor.
static int openTmpFile(const char* dirName)
{
#ifdef O_TMPFILE
int flags = 0;
#ifdef O_CLOEXEC
flags |= O_CLOEXEC;
#endif
int tfd = open(dirName, flags | O_TMPFILE | O_RDWR | O_EXCL, 0700);
if (tfd != -1)
return tfd;
#endif
const char* template_subdir = "/mlMapXXXXXX";
TempString buff((char*)malloc(strlen(dirName) + strlen(template_subdir) + 1));
if (buff == 0) return -1; // Unable to allocate
strcpy(buff, dirName);
strcat(buff, template_subdir);
int fd = mkstemp(buff);
if (fd == -1) return -1;
unlink(buff);
return fd;
}
static int createTemporaryFile()
{
char *tmpDir = getenv("TMPDIR");
int fd;
if (tmpDir != NULL)
{
fd = openTmpFile(tmpDir);
if (fd != -1) return fd;
}
#ifdef P_tmpdir
fd = openTmpFile(P_tmpdir);
if (fd != -1) return fd;
#endif
fd = openTmpFile("/tmp");
if (fd != -1) return fd;
fd = openTmpFile("/var/tmp");
if (fd != -1) return fd;
return -1;
}
-#ifdef POLYML32IN64
-OSMem::OSMem()
+OSMemInRegion::OSMemInRegion()
{
memBase = 0;
shadowFd = -1;
}
-OSMem::~OSMem()
+OSMemInRegion::~OSMemInRegion()
{
+ if (shadowFd != -1) close(shadowFd);
}
-bool OSMem::Initialise(enum _MemUsage usage, size_t space /* = 0 */, void** pBase /* = 0 */)
+bool OSMemInRegion::Initialise(enum _MemUsage usage, size_t space /* = 0 */, void** pBase /* = 0 */)
{
memUsage = usage;
pageSize = getpagesize();
bool simpleMmap;
if (usage != UsageExecutableCode) simpleMmap = true;
else
{
// Can we allocate memory with write+execute?
void *test = mmap(0, pageSize, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANON|MAP_JIT, -1, 0);
if (test != MAP_FAILED)
{
munmap(FIXTYPE test, pageSize);
simpleMmap = true;
}
else simpleMmap = false;
}
if (simpleMmap)
{
// Don't require shadow area. Can use mmap
int flags = MAP_PRIVATE | MAP_ANON;
if (usage == UsageExecutableCode) flags |= MAP_JIT;
memBase = (char*)mmap(0, space, PROT_NONE, flags, -1, 0);
if (memBase == MAP_FAILED) return false;
// We need the heap to be such that the top 32-bits are non-zero.
if ((uintptr_t)memBase < ((uintptr_t)1 << 32))
{
// Allocate again.
void* newSpace = mmap(0, space, PROT_NONE, MAP_PRIVATE | MAP_ANON, -1, 0);
munmap(FIXTYPE memBase, space); // Free the old area that isn't suitable.
// Return what we got, or zero if it failed.
memBase = (char*)newSpace;
}
shadowBase = memBase;
}
else
{
// More difficult - require file mapping
shadowFd = createTemporaryFile();
if (shadowFd == -1) return false;
if (ftruncate(shadowFd, space) == -1) return false;
void *readWrite = mmap(0, space, PROT_NONE, MAP_SHARED, shadowFd, 0);
if (readWrite == MAP_FAILED) return 0;
memBase = (char*)mmap(0, space, PROT_NONE, MAP_SHARED, shadowFd, 0);
if (memBase == MAP_FAILED)
{
munmap(FIXTYPE readWrite, space);
return false;
}
// This should be above 32-bits.
ASSERT((uintptr_t)memBase >= ((uintptr_t)1 << 32));
shadowBase = (char*)readWrite;
}
if (pBase != 0) *pBase = memBase;
// Create a bitmap with a bit for each page.
if (!pageMap.Create(space / pageSize))
return false;
lastAllocated = space / pageSize; // Beyond the last page in the area
// Set the last bit in the area so that we don't use it.
// This is effectively a work-around for a problem with the heap.
// If we have a zero-sized cell at the end of the memory its address is
// going to be zero. This causes problems with forwarding pointers.
// There may be better ways of doing this.
pageMap.SetBit(space / pageSize - 1);
return true;
}
-void* OSMem::AllocateDataArea(size_t& space)
+void* OSMemInRegion::AllocateDataArea(size_t& space)
{
char* baseAddr;
{
PLocker l(&bitmapLock);
uintptr_t pages = (space + pageSize - 1) / pageSize;
// Round up to an integral number of pages.
space = pages * pageSize;
// Find some space
while (pageMap.TestBit(lastAllocated - 1)) // Skip the wholly allocated area.
lastAllocated--;
uintptr_t free = pageMap.FindFree(0, lastAllocated, pages);
if (free == lastAllocated)
return 0; // Can't find the space.
pageMap.SetBits(free, pages);
// TODO: Do we need to zero this? It may have previously been set.
baseAddr = memBase + free * pageSize;
}
int prot = PROT_READ | PROT_WRITE;
int flags = MAP_FIXED | MAP_PRIVATE | MAP_ANON;
#if defined(MAP_STACK) && defined(__OpenBSD__)
// On OpenBSD the stack must be mapped with MAP_STACK otherwise it
// segfaults. On FreeBSD, though, this isn't necessary and causes problems.
if (memUsage == UsageStack) flags |= MAP_STACK;
#endif
if (mmap(baseAddr, space, prot, flags, -1, 0) == MAP_FAILED)
return 0;
msync(baseAddr, space, MS_SYNC | MS_INVALIDATE);
return baseAddr;
}
-bool OSMem::FreeDataArea(void* p, size_t space)
+bool OSMemInRegion::FreeDataArea(void* p, size_t space)
{
char* addr = (char*)p;
uintptr_t offset = (addr - memBase) / pageSize;
// Remap the pages as new entries. This should remove the old versions.
if (mmap(p, space, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0) == MAP_FAILED)
return false;
msync(p, space, MS_SYNC | MS_INVALIDATE);
uintptr_t pages = space / pageSize;
{
PLocker l(&bitmapLock);
pageMap.ClearBits(offset, pages);
if (offset + pages > lastAllocated) // We allocate from the top down.
lastAllocated = offset + pages;
}
return true;
}
-void* OSMem::AllocateCodeArea(size_t& space, void*& shadowArea)
+void* OSMemInRegion::AllocateCodeArea(size_t& space, void*& shadowArea)
{
uintptr_t offset;
{
PLocker l(&bitmapLock);
uintptr_t pages = (space + pageSize - 1) / pageSize;
// Round up to an integral number of pages.
space = pages * pageSize;
// Find some space
while (pageMap.TestBit(lastAllocated - 1)) // Skip the wholly allocated area.
lastAllocated--;
uintptr_t free = pageMap.FindFree(0, lastAllocated, pages);
if (free == lastAllocated)
return 0; // Can't find the space.
pageMap.SetBits(free, pages);
offset = free * pageSize;
}
if (shadowFd == -1)
{
char *baseAddr = memBase + offset;
int prot = PROT_READ | PROT_WRITE;
if (memUsage == UsageExecutableCode) prot |= PROT_EXEC;
if (mprotect(baseAddr, space, prot) != 0)
return 0;
msync(baseAddr, space, MS_SYNC | MS_INVALIDATE);
shadowArea = baseAddr;
return baseAddr;
}
else
{
char *baseAddr = memBase + offset;
char *readWriteArea = shadowBase + offset;
if (mmap(baseAddr, space, PROT_READ|PROT_EXEC, MAP_FIXED | MAP_SHARED, shadowFd, offset) == MAP_FAILED)
return 0;
msync(baseAddr, space, MS_SYNC | MS_INVALIDATE);
if (mmap(readWriteArea, space, PROT_READ|PROT_WRITE, MAP_FIXED | MAP_SHARED, shadowFd, offset) == MAP_FAILED)
return 0;
msync(readWriteArea, space, MS_SYNC | MS_INVALIDATE);
shadowArea = readWriteArea;
return baseAddr;
}
}
-bool OSMem::FreeCodeArea(void* codeAddr, void* dataAddr, size_t space)
+bool OSMemInRegion::FreeCodeArea(void* codeAddr, void* dataAddr, size_t space)
{
// Free areas by mapping them with PROT_NONE.
uintptr_t offset = ((char*)codeAddr - memBase) / pageSize;
if (shadowFd == -1)
{
mmap(codeAddr, space, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0);
msync(codeAddr, space, MS_SYNC | MS_INVALIDATE);
}
else
{
mmap(codeAddr, space, PROT_NONE, MAP_SHARED, shadowFd, offset);
msync(codeAddr, space, MS_SYNC | MS_INVALIDATE);
mmap(dataAddr, space, PROT_NONE, MAP_SHARED, shadowFd, offset);
msync(dataAddr, space, MS_SYNC | MS_INVALIDATE);
}
uintptr_t pages = space / pageSize;
{
PLocker l(&bitmapLock);
pageMap.ClearBits(offset, pages);
if (offset + pages > lastAllocated) // We allocate from the top down.
lastAllocated = offset + pages;
}
return true;
}
-bool OSMem::EnableWrite(bool enable, void* p, size_t space)
+bool OSMemInRegion::EnableWrite(bool enable, void* p, size_t space)
{
int res = mprotect(FIXTYPE p, space, enable ? PROT_READ|PROT_WRITE: PROT_READ);
return res != -1;
}
-bool OSMem::DisableWriteForCode(void* codeAddr, void* dataAddr, size_t space)
+bool OSMemInRegion::DisableWriteForCode(void* codeAddr, void* dataAddr, size_t space)
{
int prot = PROT_READ;
if (memUsage == UsageExecutableCode) prot |= PROT_EXEC;
int res = mprotect(FIXTYPE codeAddr, space, prot);
return res != -1;
}
-#else
-
// Native address versions
-OSMem::OSMem()
+OSMemUnrestricted::OSMemUnrestricted()
{
allocPtr = 0;
shadowFd = -1;
}
-OSMem::~OSMem()
+OSMemUnrestricted::~OSMemUnrestricted()
{
if (shadowFd != -1) close(shadowFd);
}
-bool OSMem::Initialise(enum _MemUsage usage, size_t space /* = 0 */, void **pBase /* = 0 */)
+bool OSMemUnrestricted::Initialise(enum _MemUsage usage)
{
memUsage = usage;
pageSize = getpagesize();
if (usage != UsageExecutableCode) return true;
// Can we allocate memory with write+execute?
void *test = mmap(0, pageSize, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_JIT|MAP_PRIVATE|MAP_ANON, -1, 0);
if (test != MAP_FAILED)
{
// Don't require shadow area
munmap(FIXTYPE test, pageSize);
return true;
}
if (errno != ENOTSUP && errno != EACCES) // Fails with ENOTSUPP on OpenBSD and EACCES in SELinux.
return false;
// Check that read-write works.
test = mmap(0, pageSize, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
if (test == MAP_FAILED)
return false; // There's a problem.
munmap(FIXTYPE test, pageSize);
// Need to create a file descriptor for mapping.
shadowFd = createTemporaryFile();
if (shadowFd != -1) return true;
return false;
}
// Allocate space and return a pointer to it. The size is the minimum
// size requested and it is updated with the actual space allocated.
// Returns NULL if it cannot allocate the space.
-void *OSMem::AllocateDataArea(size_t &space)
+void *OSMemUnrestricted::AllocateDataArea(size_t &space)
{
// Round up to an integral number of pages.
space = (space + pageSize-1) & ~(pageSize-1);
int fd = -1; // This value is required by FreeBSD. Linux doesn't care
int flags = MAP_PRIVATE | MAP_ANON;
#if defined(MAP_STACK) && defined(__OpenBSD__)
// On OpenBSD the stack must be mapped with MAP_STACK otherwise it
// segfaults. On FreeBSD, though, this isn't necessary and causes problems.
if (memUsage == UsageStack) flags |= MAP_STACK;
#endif
void *result = mmap(0, space, PROT_READ|PROT_WRITE, flags, fd, 0);
// Convert MAP_FAILED (-1) into NULL
if (result == MAP_FAILED)
return 0;
return result;
}
// Release the space previously allocated. This must free the whole of
// the segment. The space must be the size actually allocated.
-bool OSMem::FreeDataArea(void *p, size_t space)
+bool OSMemUnrestricted::FreeDataArea(void *p, size_t space)
{
return munmap(FIXTYPE p, space) == 0;
}
-bool OSMem::EnableWrite(bool enable, void* p, size_t space)
+bool OSMemUnrestricted::EnableWrite(bool enable, void* p, size_t space)
{
int res = mprotect(FIXTYPE p, space, enable ? PROT_READ|PROT_WRITE: PROT_READ);
return res != -1;
}
-void *OSMem::AllocateCodeArea(size_t &space, void*& shadowArea)
+void *OSMemUnrestricted::AllocateCodeArea(size_t &space, void*& shadowArea)
{
// Round up to an integral number of pages.
space = (space + pageSize-1) & ~(pageSize-1);
if (shadowFd == -1)
{
int fd = -1; // This value is required by FreeBSD. Linux doesn't care
int prot = PROT_READ | PROT_WRITE;
int flags = MAP_PRIVATE|MAP_ANON;
if (memUsage == UsageExecutableCode)
{
prot |= PROT_EXEC;
flags |= MAP_JIT;
}
void *result = mmap(0, space, prot, flags, fd, 0);
// Convert MAP_FAILED (-1) into NULL
if (result == MAP_FAILED)
return 0;
shadowArea = result;
return result;
}
// Have to use dual areas.
size_t allocAt;
{
PLocker lock(&allocLock);
allocAt = allocPtr;
allocPtr += space;
}
if (ftruncate(shadowFd, allocAt + space) == -1)
return 0;
void *readExec = mmap(0, space, PROT_READ|PROT_EXEC, MAP_SHARED, shadowFd, allocAt);
if (readExec == MAP_FAILED)
return 0;
void *readWrite = mmap(0, space, PROT_READ|PROT_WRITE, MAP_SHARED, shadowFd, allocAt);
if (readWrite == MAP_FAILED)
{
munmap(FIXTYPE readExec, space);
return 0;
}
shadowArea = readWrite;
return readExec;
}
-bool OSMem::FreeCodeArea(void *codeArea, void *dataArea, size_t space)
+bool OSMemUnrestricted::FreeCodeArea(void *codeArea, void *dataArea, size_t space)
{
bool freeCode = munmap(FIXTYPE codeArea, space) == 0;
if (codeArea == dataArea) return freeCode;
return (munmap(FIXTYPE dataArea, space) == 0) & freeCode;
}
-bool OSMem::DisableWriteForCode(void* codeAddr, void* dataAddr, size_t space)
+bool OSMemUnrestricted::DisableWriteForCode(void* codeAddr, void* dataAddr, size_t space)
{
int prot = PROT_READ;
if (memUsage == UsageExecutableCode) prot |= PROT_EXEC;
int res = mprotect(FIXTYPE codeAddr, space, prot);
return res != -1;
}
-
-#endif
diff --git a/libpolyml/osmemwin.cpp b/libpolyml/osmemwin.cpp
index 3aced6a4..302c4aca 100644
--- a/libpolyml/osmemwin.cpp
+++ b/libpolyml/osmemwin.cpp
@@ -1,256 +1,245 @@
/*
Title: osomem.cpp - Interface to OS memory management - Windows version
Copyright (c) 2006, 2017-18, 2020 David C.J. Matthews
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#elif defined(_WIN32)
#include "winconfig.h"
#else
#error "No configuration file"
#endif
#ifdef HAVE_ASSERT_H
#include
#define ASSERT(x) assert(x)
#else
#define ASSERT(x)
#endif
#include "osmem.h"
#include "bitmap.h"
#include "locking.h"
// Use Windows memory management.
#include
-#ifdef POLYML32IN64
-OSMem::OSMem()
+OSMemInRegion::OSMemInRegion(): memBase(0)
{
- memBase = 0;
}
-OSMem::~OSMem()
+OSMemInRegion::~OSMemInRegion()
{
}
-bool OSMem::Initialise(enum _MemUsage usage, size_t space /* = 0 */, void** pBase /* = 0 */)
+bool OSMemInRegion::Initialise(enum _MemUsage usage, size_t space /* = 0 */, void** pBase /* = 0 */)
{
memUsage = usage;
// Get the page size and round up to that multiple.
SYSTEM_INFO sysInfo;
GetSystemInfo(&sysInfo);
// Get the page size. Put it in a size_t variable otherwise the rounding
// up of "space" may go wrong on 64-bits.
pageSize = sysInfo.dwPageSize;
memBase = (char*)VirtualAlloc(0, space, MEM_RESERVE, PAGE_NOACCESS);
if (memBase == 0) return 0;
// We need the heap to be such that the top 32-bits are non-zero.
if ((uintptr_t)memBase < ((uintptr_t)1 << 32))
{
// Allocate again.
void* newSpace = VirtualAlloc(0, space, MEM_RESERVE, PAGE_NOACCESS);
VirtualFree(memBase, 0, MEM_RELEASE); // Free the old area that isn't suitable.
// Return what we got, or zero if it failed.
memBase = (char*)newSpace;
}
if (pBase != 0) *pBase = memBase;
// Create a bitmap with a bit for each page.
if (!pageMap.Create(space / pageSize))
return false;
lastAllocated = space / pageSize; // Beyond the last page in the area
// Set the last bit in the area so that we don't use it.
// This is effectively a work-around for a problem with the heap.
// If we have a zero-sized cell at the end of the memory its address is
// going to be zero. This causes problems with forwarding pointers.
// There may be better ways of doing this.
pageMap.SetBit(space / pageSize - 1);
return true;
}
-void* OSMem::AllocateDataArea(size_t& space)
+void* OSMemInRegion::AllocateDataArea(size_t& space)
{
char* baseAddr;
{
PLocker l(&bitmapLock);
uintptr_t pages = (space + pageSize - 1) / pageSize;
// Round up to an integral number of pages.
space = pages * pageSize;
// Find some space
while (pageMap.TestBit(lastAllocated - 1)) // Skip the wholly allocated area.
lastAllocated--;
uintptr_t free = pageMap.FindFree(0, lastAllocated, pages);
if (free == lastAllocated)
return 0; // Can't find the space.
pageMap.SetBits(free, pages);
// TODO: Do we need to zero this? It may have previously been set.
baseAddr = memBase + free * pageSize;
}
return VirtualAlloc(baseAddr, space, MEM_COMMIT, PAGE_READWRITE);
}
-bool OSMem::FreeDataArea(void* p, size_t space)
+bool OSMemInRegion::FreeDataArea(void* p, size_t space)
{
char* addr = (char*)p;
uintptr_t offset = (addr - memBase) / pageSize;
if (!VirtualFree(p, space, MEM_DECOMMIT))
return false;
uintptr_t pages = space / pageSize;
{
PLocker l(&bitmapLock);
pageMap.ClearBits(offset, pages);
if (offset + pages > lastAllocated) // We allocate from the top down.
lastAllocated = offset + pages;
}
return true;
}
-void* OSMem::AllocateCodeArea(size_t& space, void*& shadowArea)
+void* OSMemInRegion::AllocateCodeArea(size_t& space, void*& shadowArea)
{
char* baseAddr;
{
PLocker l(&bitmapLock);
uintptr_t pages = (space + pageSize - 1) / pageSize;
// Round up to an integral number of pages.
space = pages * pageSize;
// Find some space
while (pageMap.TestBit(lastAllocated - 1)) // Skip the wholly allocated area.
lastAllocated--;
uintptr_t free = pageMap.FindFree(0, lastAllocated, pages);
if (free == lastAllocated)
return 0; // Can't find the space.
pageMap.SetBits(free, pages);
// TODO: Do we need to zero this? It may have previously been set.
baseAddr = memBase + free * pageSize;
}
void* dataArea =
VirtualAlloc(baseAddr, space, MEM_COMMIT, memUsage == UsageExecutableCode ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE);
shadowArea = dataArea;
return dataArea;
}
-bool OSMem::FreeCodeArea(void* codeAddr, void* dataAddr, size_t space)
+bool OSMemInRegion::FreeCodeArea(void* codeAddr, void* dataAddr, size_t space)
{
ASSERT(codeAddr == dataAddr);
char* addr = (char*)codeAddr;
uintptr_t offset = (addr - memBase) / pageSize;
if (! VirtualFree(codeAddr, space, MEM_DECOMMIT))
return false;
uintptr_t pages = space / pageSize;
{
PLocker l(&bitmapLock);
pageMap.ClearBits(offset, pages);
if (offset + pages > lastAllocated) // We allocate from the top down.
lastAllocated = offset + pages;
}
return true;
}
-bool OSMem::EnableWrite(bool enable, void* p, size_t space)
+bool OSMemInRegion::EnableWrite(bool enable, void* p, size_t space)
{
DWORD oldProtect;
return VirtualProtect(p, space, enable ? PAGE_READWRITE : PAGE_READONLY, &oldProtect) == TRUE;
}
-bool OSMem::DisableWriteForCode(void* codeAddr, void* dataAddr, size_t space)
+bool OSMemInRegion::DisableWriteForCode(void* codeAddr, void* dataAddr, size_t space)
{
ASSERT(codeAddr == dataAddr);
DWORD oldProtect;
return VirtualProtect(codeAddr, space,
memUsage == UsageExecutableCode ? PAGE_EXECUTE_READ : PAGE_READONLY, &oldProtect) == TRUE;
}
-#else
+// These are needed in Unix but not in Windows.
+OSMemUnrestricted::OSMemUnrestricted() {}
-// Native address versions
-OSMem::OSMem()
-{
-}
+OSMemUnrestricted::~OSMemUnrestricted() {}
-OSMem::~OSMem()
-{
-}
-
-bool OSMem::Initialise(enum _MemUsage usage, size_t space /* = 0 */, void **pBase /* = 0 */)
+bool OSMemUnrestricted::Initialise(enum _MemUsage usage)
{
memUsage = usage;
// Get the page size and round up to that multiple.
SYSTEM_INFO sysInfo;
GetSystemInfo(&sysInfo);
// Get the page size. Put it in a size_t variable otherwise the rounding
// up of "space" may go wrong on 64-bits.
pageSize = sysInfo.dwPageSize;
return true;
}
// Allocate space and return a pointer to it. The size is the minimum
// size requested and it is updated with the actual space allocated.
// Returns NULL if it cannot allocate the space.
-void *OSMem::AllocateDataArea(size_t &space)
+void *OSMemUnrestricted::AllocateDataArea(size_t &space)
{
space = (space + pageSize - 1) & ~(pageSize - 1);
DWORD options = MEM_RESERVE | MEM_COMMIT;
return VirtualAlloc(0, space, options, PAGE_READWRITE);
}
// Release the space previously allocated. This must free the whole of
// the segment. The space must be the size actually allocated.
-bool OSMem::FreeDataArea(void *p, size_t space)
+bool OSMemUnrestricted::FreeDataArea(void *p, size_t space)
{
return VirtualFree(p, 0, MEM_RELEASE) == TRUE;
}
// Adjust the permissions on a segment. This must apply to the
// whole of a segment.
-bool OSMem::EnableWrite(bool enable, void* p, size_t space)
+bool OSMemUnrestricted::EnableWrite(bool enable, void* p, size_t space)
{
DWORD oldProtect;
return VirtualProtect(p, space, enable ? PAGE_READWRITE: PAGE_READONLY, &oldProtect) == TRUE;
}
-void* OSMem::AllocateCodeArea(size_t& space, void*& shadowArea)
+void* OSMemUnrestricted::AllocateCodeArea(size_t& space, void*& shadowArea)
{
space = (space + pageSize - 1) & ~(pageSize - 1);
DWORD options = MEM_RESERVE | MEM_COMMIT;
void * dataAddr = VirtualAlloc(0, space, options,
memUsage == UsageExecutableCode ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE);
shadowArea = dataAddr;
return dataAddr;
}
-bool OSMem::FreeCodeArea(void* codeAddr, void* dataAddr, size_t space)
+bool OSMemUnrestricted::FreeCodeArea(void* codeAddr, void* dataAddr, size_t space)
{
ASSERT(codeAddr == dataAddr);
return VirtualFree(codeAddr, 0, MEM_RELEASE) == TRUE;
}
-bool OSMem::DisableWriteForCode(void* codeAddr, void* dataAddr, size_t space)
+bool OSMemUnrestricted::DisableWriteForCode(void* codeAddr, void* dataAddr, size_t space)
{
ASSERT(codeAddr == dataAddr);
DWORD oldProtect;
return VirtualProtect(codeAddr, space,
memUsage == UsageExecutableCode ? PAGE_EXECUTE_READ : PAGE_READONLY, &oldProtect) == TRUE;
}
-
-#endif
-
diff --git a/libpolyml/pecoffexport.cpp b/libpolyml/pecoffexport.cpp
index 7198471a..369f772b 100644
--- a/libpolyml/pecoffexport.cpp
+++ b/libpolyml/pecoffexport.cpp
@@ -1,415 +1,427 @@
/*
Title: Export memory as a PE/COFF object
Author: David C. J. Matthews.
- Copyright (c) 2006, 2011, 2016-18 David C. J. Matthews
+ Copyright (c) 2006, 2011, 2016-18, 2020-21 David C. J. Matthews
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR H PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#elif defined(_WIN32)
#include "winconfig.h"
#else
#error "No configuration file"
#endif
#include
#include
#ifdef HAVE_STDDEF_H
#include
#endif
#ifdef HAVE_STRING_H
#include
#endif
#ifdef HAVE_ERRNO_H
#include
#endif
#ifdef HAVE_ASSERT_H
#include
#endif
#include
#include "globals.h"
#include "pecoffexport.h"
#include "machine_dep.h"
#include "scanaddrs.h"
#include "run_time.h"
#include "../polyexports.h"
#include "version.h"
#include "polystring.h"
#include "timing.h"
#ifdef _DEBUG
/* MS C defines _DEBUG for debug builds. */
#define DEBUG
#endif
#ifdef DEBUG
#define ASSERT(x) assert(x)
#else
#define ASSERT(x)
#endif
#if (defined(HOSTARCHITECTURE_X86))
#define DIRECT_WORD_RELOCATION IMAGE_REL_I386_DIR32
#define RELATIVE_32BIT_RELOCATION IMAGE_REL_I386_REL32
#define IMAGE_MACHINE_TYPE IMAGE_FILE_MACHINE_I386
#elif (defined(HOSTARCHITECTURE_X86_64))
#define DIRECT_WORD_RELOCATION IMAGE_REL_AMD64_ADDR64
#define RELATIVE_32BIT_RELOCATION IMAGE_REL_AMD64_REL32
#define IMAGE_MACHINE_TYPE IMAGE_FILE_MACHINE_AMD64
#elif (defined(HOSTARCHITECTURE_AARCH64))
#define DIRECT_WORD_RELOCATION IMAGE_REL_ARM64_ADDR64
#define RELATIVE_32BIT_RELOCATION IMAGE_REL_AMD64_REL32 // Leave for the moment
#define IMAGE_MACHINE_TYPE IMAGE_FILE_MACHINE_ARM64
#else
#error "Unknown architecture: unable to configure exporter for PECOFF"
#endif
void PECOFFExport::writeRelocation(const IMAGE_RELOCATION* reloc)
{
fwrite(reloc, sizeof(*reloc), 1, exportFile);
if (relocationCount == 0)
firstRelocation = *reloc;
relocationCount++;
}
void PECOFFExport::addExternalReference(void *relocAddr, const char *name, bool/* isFuncPtr*/)
{
externTable.makeEntry(name);
IMAGE_RELOCATION reloc;
// Set the offset within the section we're scanning.
setRelocationAddress(relocAddr, &reloc.VirtualAddress);
reloc.SymbolTableIndex = symbolNum++;
reloc.Type = DIRECT_WORD_RELOCATION;
writeRelocation(&reloc);
}
// Generate the address relative to the start of the segment.
void PECOFFExport::setRelocationAddress(void *p, DWORD *reloc)
{
unsigned area = findArea(p);
DWORD offset = (DWORD)((char*)p - (char*)memTable[area].mtOriginalAddr);
*reloc = offset;
}
// Create a relocation entry for an address at a given location.
PolyWord PECOFFExport::createRelocation(PolyWord p, void *relocAddr)
{
IMAGE_RELOCATION reloc;
// Set the offset within the section we're scanning.
setRelocationAddress(relocAddr, &reloc.VirtualAddress);
void *addr = p.AsAddress();
unsigned addrArea = findArea(addr);
POLYUNSIGNED offset = (POLYUNSIGNED)((char*)addr - (char*)memTable[addrArea].mtOriginalAddr);
reloc.SymbolTableIndex = addrArea;
reloc.Type = DIRECT_WORD_RELOCATION;
writeRelocation(&reloc);
return PolyWord::FromUnsigned(offset);
}
#ifdef SYMBOLS_REQUIRE_UNDERSCORE
#define POLY_PREFIX_STRING "_"
#else
#define POLY_PREFIX_STRING ""
#endif
void PECOFFExport::writeSymbol(const char *symbolName, __int32 value, int section, bool isExtern, int symType)
{
// On X86/32 we have to add an underscore to external symbols
TempCString fullSymbol;
fullSymbol = (char*)malloc(strlen(POLY_PREFIX_STRING) + strlen(symbolName) + 1);
if (fullSymbol == 0) throw MemoryException();
sprintf(fullSymbol, "%s%s", POLY_PREFIX_STRING, symbolName);
IMAGE_SYMBOL symbol;
memset(&symbol, 0, sizeof(symbol)); // Zero the unused part of the string
// Short symbol names go in the entry, longer ones go in the string table.
if (strlen(fullSymbol) <= 8)
strcat((char*)symbol.N.ShortName, fullSymbol);
else {
symbol.N.Name.Short = 0;
// We have to add 4 bytes because the first word written to the file is a length word.
symbol.N.Name.Long = stringTable.makeEntry(fullSymbol) + sizeof(unsigned);
}
symbol.Value = value;
symbol.SectionNumber = section;
symbol.Type = symType;
symbol.StorageClass = isExtern ? IMAGE_SYM_CLASS_EXTERNAL : IMAGE_SYM_CLASS_STATIC;
fwrite(&symbol, sizeof(symbol), 1, exportFile);
}
/* This is called for each constant within the code.
Print a relocation entry for the word and return a value that means
that the offset is saved in original word. */
-void PECOFFExport::ScanConstant(PolyObject *base, byte *addr, ScanRelocationKind code)
+void PECOFFExport::ScanConstant(PolyObject *base, byte *addr, ScanRelocationKind code, intptr_t displacement)
{
#ifndef POLYML32IN64
IMAGE_RELOCATION reloc;
- PolyObject *p = GetConstantValue(addr, code);
+ PolyObject *p = GetConstantValue(addr, code, displacement);
if (p == 0)
return;
void *a = p;
unsigned aArea = findArea(a);
// We don't need a relocation if this is relative to the current segment
// since the relative address will already be right.
if (code == PROCESS_RELOC_I386RELATIVE && aArea == findArea(addr))
return;
setRelocationAddress(addr, &reloc.VirtualAddress);
// Set the value at the address to the offset relative to the symbol.
uintptr_t offset = (char*)a - (char*)memTable[aArea].mtOriginalAddr;
reloc.SymbolTableIndex = aArea;
// The value we store here is the offset whichever relocation method
// we're using.
unsigned maxSize = code == PROCESS_RELOC_I386RELATIVE ? 4: sizeof(PolyWord);
for (unsigned i = 0; i < maxSize; i++)
{
addr[i] = (byte)(offset & 0xff);
offset >>= 8;
}
if (code == PROCESS_RELOC_I386RELATIVE)
reloc.Type = RELATIVE_32BIT_RELOCATION;
else
reloc.Type = DIRECT_WORD_RELOCATION;
writeRelocation(&reloc);
#endif
}
// Set the file alignment.
void PECOFFExport::alignFile(int align)
{
char pad[32] = {0}; // Maximum alignment
int offset = ftell(exportFile);
if ((offset % align) == 0) return;
fwrite(&pad, align - (offset % align), 1, exportFile);
}
void PECOFFExport::exportStore(void)
{
PolyWord *p;
IMAGE_FILE_HEADER fhdr;
IMAGE_SECTION_HEADER *sections = 0;
IMAGE_RELOCATION reloc;
unsigned i;
// These are written out as the description of the data.
exportDescription exports;
time_t now = getBuildTime();
sections = new IMAGE_SECTION_HEADER [memTableEntries+1]; // Plus one for the tables.
// Write out initial values for the headers. These are overwritten at the end.
// File header
memset(&fhdr, 0, sizeof(fhdr));
fhdr.Machine = IMAGE_MACHINE_TYPE; // x86-64
fhdr.NumberOfSections = memTableEntries+1; // One for each area plus one for the tables.
fhdr.TimeDateStamp = (DWORD)now;
//fhdr.NumberOfSymbols = memTableEntries+1; // One for each area plus "poly_exports"
fwrite(&fhdr, sizeof(fhdr), 1, exportFile); // Write it for the moment.
// External symbols are added after the memory table entries and "poly_exports".
symbolNum = memTableEntries+1; // The first external symbol
// Section headers.
for (i = 0; i < memTableEntries; i++)
{
memset(§ions[i], 0, sizeof(IMAGE_SECTION_HEADER));
sections[i].SizeOfRawData = (DWORD)memTable[i].mtLength;
sections[i].Characteristics = IMAGE_SCN_MEM_READ | IMAGE_SCN_ALIGN_8BYTES;
if (memTable[i].mtFlags & MTF_WRITEABLE)
{
// Mutable data
ASSERT(!(memTable[i].mtFlags & MTF_EXECUTABLE)); // Executable areas can't be writable.
strcpy((char*)sections[i].Name, ".data");
sections[i].Characteristics |= IMAGE_SCN_MEM_WRITE | IMAGE_SCN_CNT_INITIALIZED_DATA;
}
#ifndef CODEISNOTEXECUTABLE
// Not if we're building the interpreted version.
else if (memTable[i].mtFlags & MTF_EXECUTABLE)
{
// Immutable data areas are marked as executable.
strcpy((char*)sections[i].Name, ".text");
sections[i].Characteristics |= IMAGE_SCN_MEM_EXECUTE | IMAGE_SCN_CNT_CODE;
}
#endif
else
{
// Immutable data areas are marked as executable.
strcpy((char*)sections[i].Name, ".rdata");
sections[i].Characteristics |= IMAGE_SCN_CNT_INITIALIZED_DATA;
}
}
// Extra section for the tables.
memset(§ions[memTableEntries], 0, sizeof(IMAGE_SECTION_HEADER));
sprintf((char*)sections[memTableEntries].Name, ".data");
sections[memTableEntries].SizeOfRawData = sizeof(exports) + (memTableEntries+1)*sizeof(memoryTableEntry);
// Don't need write access here but keep it for consistency with other .data sections
sections[memTableEntries].Characteristics =
IMAGE_SCN_MEM_READ | IMAGE_SCN_ALIGN_8BYTES | IMAGE_SCN_MEM_WRITE | IMAGE_SCN_CNT_INITIALIZED_DATA;
fwrite(sections, sizeof(IMAGE_SECTION_HEADER), memTableEntries+1, exportFile); // Write it for the moment.
for (i = 0; i < memTableEntries; i++)
{
sections[i].PointerToRelocations = ftell(exportFile);
relocationCount = 0;
// Create the relocation table and turn all addresses into offsets.
char *start = (char*)memTable[i].mtOriginalAddr;
char *end = start + memTable[i].mtLength;
for (p = (PolyWord*)start; p < (PolyWord*)end; )
{
p++;
PolyObject *obj = (PolyObject*)p;
POLYUNSIGNED length = obj->Length();
- // Update any constants before processing the object
- // We need that for relative jumps/calls in X86/64.
if (length != 0 && obj->IsCodeObject())
+ {
+ POLYUNSIGNED constCount;
+ PolyWord* cp;
+ // Get the constant area pointer first because ScanConstantsWithinCode
+ // may alter it.
+ machineDependent->GetConstSegmentForCode(obj, cp, constCount);
+ // Update any constants before processing the object
+ // We need that for relative jumps/calls in X86/64.
machineDependent->ScanConstantsWithinCode(obj, this);
- relocateObject(obj);
+ if (cp > (PolyWord*)obj && cp < ((PolyWord*)obj) + length)
+ {
+ // Process the constants if they're in the area but not if they've been moved.
+ for (POLYUNSIGNED i = 0; i < constCount; i++) relocateValue(&(cp[i]));
+ }
+ }
+ else relocateObject(obj);
p += length;
}
// If there are more than 64k relocations set this bit and set the value to 64k-1.
if (relocationCount >= 65535) {
// We're going to overwrite the first relocation so we have to write the
// copy we saved here.
writeRelocation(&firstRelocation); // Increments relocationCount
sections[i].NumberOfRelocations = 65535;
sections[i].Characteristics |= IMAGE_SCN_LNK_NRELOC_OVFL;
// We have to go back and patch up the first (dummy) relocation entry
// which contains the count.
fseek(exportFile, sections[i].PointerToRelocations, SEEK_SET);
memset(&reloc, 0, sizeof(reloc));
reloc.RelocCount = relocationCount;
fwrite(&reloc, sizeof(reloc), 1, exportFile);
fseek(exportFile, 0, SEEK_END); // Return to the end of the file.
}
else sections[i].NumberOfRelocations = relocationCount;
}
// We don't need to handle relocation overflow here.
sections[memTableEntries].PointerToRelocations = ftell(exportFile);
relocationCount = 0;
// Relocations for "exports" and "memTable";
// Address of "memTable" within "exports". We can't use createRelocation because
// the position of the relocation is not in either the mutable or the immutable area.
reloc.Type = DIRECT_WORD_RELOCATION;
reloc.SymbolTableIndex = memTableEntries; // Relative to poly_exports
reloc.VirtualAddress = offsetof(exportDescription, memTable);
fwrite(&reloc, sizeof(reloc), 1, exportFile);
relocationCount++;
// Address of "rootFunction" within "exports"
reloc.Type = DIRECT_WORD_RELOCATION;
unsigned rootAddrArea = findArea(rootFunction);
reloc.SymbolTableIndex = rootAddrArea;
reloc.VirtualAddress = offsetof(exportDescription, rootFunction);
fwrite(&reloc, sizeof(reloc), 1, exportFile);
relocationCount++;
for (i = 0; i < memTableEntries; i++)
{
reloc.Type = DIRECT_WORD_RELOCATION;
reloc.SymbolTableIndex = i; // Relative to base symbol
reloc.VirtualAddress =
sizeof(exportDescription) + i * sizeof(memoryTableEntry) + offsetof(memoryTableEntry, mtCurrentAddr);
fwrite(&reloc, sizeof(reloc), 1, exportFile);
relocationCount++;
}
ASSERT(relocationCount < 65535); // Shouldn't get overflow!!
sections[memTableEntries].NumberOfRelocations = relocationCount;
// Now the binary data.
for (i = 0; i < memTableEntries; i++)
{
sections[i].PointerToRawData = ftell(exportFile);
fwrite(memTable[i].mtOriginalAddr, 1, memTable[i].mtLength, exportFile);
}
sections[memTableEntries].PointerToRawData = ftell(exportFile);
memset(&exports, 0, sizeof(exports));
exports.structLength = sizeof(exportDescription);
exports.memTableSize = sizeof(memoryTableEntry);
exports.memTableEntries = memTableEntries;
exports.memTable = (memoryTableEntry *)sizeof(exports); // It follows immediately after this.
exports.rootFunction = (void*)((char*)rootFunction - (char*)memTable[rootAddrArea].mtOriginalAddr);
exports.timeStamp = now;
exports.architecture = machineDependent->MachineArchitecture();
exports.rtsVersion = POLY_version_number;
#ifdef POLYML32IN64
exports.originalBaseAddr = globalHeapBase;
#else
exports.originalBaseAddr = 0;
#endif
// Set the address values to zero before we write. They will always
// be relative to their base symbol.
for (i = 0; i < memTableEntries; i++)
memTable[i].mtCurrentAddr = 0;
fwrite(&exports, sizeof(exports), 1, exportFile);
fwrite(memTable, sizeof(memoryTableEntry), memTableEntries, exportFile);
// First the symbol table. We have one entry for the exports and an additional
// entry for each of the sections.
fhdr.PointerToSymbolTable = ftell(exportFile);
// The section numbers are one-based. Zero indicates the "common" area.
// First write symbols for each section and for poly_exports.
for (i = 0; i < memTableEntries; i++)
{
char buff[50];
sprintf(buff, "area%0d", i);
writeSymbol(buff, 0, i+1, false);
}
// Exported symbol for table.
writeSymbol("poly_exports", 0, memTableEntries+1, true);
// External references.
for (unsigned i = 0; i < externTable.stringSize; i += (unsigned)strlen(externTable.strings+i) + 1)
writeSymbol(externTable.strings+i, 0, 0, true, 0x20);
fhdr.NumberOfSymbols = symbolNum;
// The string table is written immediately after the symbols.
// The length is included as the first word.
unsigned strSize = stringTable.stringSize + sizeof(unsigned);
fwrite(&strSize, sizeof(strSize), 1, exportFile);
fwrite(stringTable.strings, stringTable.stringSize, 1, exportFile);
// Rewind to rewrite the headers.
fseek(exportFile, 0, SEEK_SET);
fwrite(&fhdr, sizeof(fhdr), 1, exportFile);
fwrite(sections, sizeof(IMAGE_SECTION_HEADER), memTableEntries+1, exportFile);
fclose(exportFile); exportFile = NULL;
delete[](sections);
}
diff --git a/libpolyml/pecoffexport.h b/libpolyml/pecoffexport.h
index adb15ddc..8587a180 100644
--- a/libpolyml/pecoffexport.h
+++ b/libpolyml/pecoffexport.h
@@ -1,66 +1,70 @@
/*
Title: Export memory as a PE/COFF object
Author: David C. J. Matthews.
- Copyright (c) 2006, 2013, 2016 David C. J. Matthews
+ Copyright (c) 2006, 2013, 2016, 2020 David C. J. Matthews
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR H PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef PECOFFEXPORT_H_INCLUDED
#define PECOFFEXPORT_H_INCLUDED
#include "scanaddrs.h" // For base class
#include "exporter.h"
#include
#include
class PECOFFExport: public Exporter, public ScanAddress
{
public:
PECOFFExport(): relocationCount(0), symbolNum(0) {}
public:
virtual void exportStore(void);
private:
// ScanAddress overrides
- virtual void ScanConstant(PolyObject *base, byte *addrOfConst, ScanRelocationKind code);
+ virtual void ScanConstant(PolyObject *base, byte *addrOfConst, ScanRelocationKind code, intptr_t displacement);
// At the moment we should only get calls to ScanConstant.
virtual PolyObject *ScanObjectAddress(PolyObject *base) { return base; }
void alignFile(int align);
virtual void addExternalReference(void *addr, const char *name, bool isFuncPtr);
+ virtual void RelocateOnly(PolyObject* base, byte* addressOfConstant, ScanRelocationKind code)
+ {
+ ScanConstant(base, addressOfConstant, code, 0);
+ }
private:
void setRelocationAddress(void *p, DWORD *reloc);
PolyWord createRelocation(PolyWord p, void *relocAddr);
void writeSymbol(const char *symbolName, __int32 value, int section, bool isExtern, int symType=0);
void writeRelocation(const IMAGE_RELOCATION* reloc);
unsigned relocationCount;
ExportStringTable stringTable;
// Table and count for external references.
ExportStringTable externTable;
unsigned symbolNum;
// Copy of the first relocation in case we
// have to overwrite it.
IMAGE_RELOCATION firstRelocation;
};
#endif
diff --git a/libpolyml/pexport.cpp b/libpolyml/pexport.cpp
index 4158ef3e..b33e1812 100644
--- a/libpolyml/pexport.cpp
+++ b/libpolyml/pexport.cpp
@@ -1,904 +1,904 @@
/*
Title: Export and import memory in a portable format
Author: David C. J. Matthews.
Copyright (c) 2006-7, 2015-8, 2020-21 David C. J. Matthews
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR H PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#elif defined(_WIN32)
#include "winconfig.h"
#else
#error "No configuration file"
#endif
#ifdef HAVE_STDIO_H
#include
#endif
#ifdef HAVE_ERRNO_H
#include
#endif
#ifdef HAVE_ASSERT_H
#include
#define ASSERT(x) assert(x)
#else
#define ASSERT(x)
#endif
#include "globals.h"
#include "pexport.h"
#include "machine_dep.h"
#include "scanaddrs.h"
#include "run_time.h"
#include "../polyexports.h"
#include "version.h"
#include "sys.h"
#include "polystring.h"
#include "memmgr.h"
#include "rtsentry.h"
#include "mpoly.h" // For polyStderr
/*
This file contains the code both to export the file and to import it
in a new session.
*/
PExport::PExport()
{
}
PExport::~PExport()
{
}
// Get the index corresponding to an address.
size_t PExport::getIndex(PolyObject *p)
{
// Binary chop to find the index from the address.
size_t lower = 0, upper = pMap.size();
while (1)
{
ASSERT(lower < upper);
size_t middle = (lower+upper)/2;
ASSERT(middle < pMap.size());
if (p < pMap[middle])
{
// Use lower to middle
upper = middle;
}
else if (p > pMap[middle])
{
// Use middle+1 to upper
lower = middle+1;
}
else // Found it
return middle;
}
}
/* Get the index corresponding to an address. */
void PExport::printAddress(void *p)
{
fprintf(exportFile, "@%" PRI_SIZET "", getIndex((PolyObject*)p));
}
void PExport::printValue(PolyWord q)
{
if (IS_INT(q) || q == PolyWord::FromUnsigned(0))
fprintf(exportFile, "%" POLYSFMT, UNTAGGED(q));
else
printAddress(q.AsAddress());
}
void PExport::printObject(PolyObject *p)
{
POLYUNSIGNED length = p->Length();
POLYUNSIGNED i;
size_t myIndex = getIndex(p);
fprintf(exportFile, "%" PRI_SIZET ":", myIndex);
if (p->IsMutable())
putc('M', exportFile);
if (OBJ_IS_NEGATIVE(p->LengthWord()))
putc('N', exportFile);
if (OBJ_IS_WEAKREF_OBJECT(p->LengthWord()))
putc('W', exportFile);
if (OBJ_IS_NO_OVERWRITE(p->LengthWord()))
putc('V', exportFile);
if (p->IsByteObject())
{
if (p->IsMutable() && p->IsWeakRefObject() && p->Length() >= sizeof(uintptr_t) / sizeof(PolyWord))
{
// This is either an entry point or a weak ref used in the FFI.
// Clear the first word
if (p->Length() == sizeof(uintptr_t)/sizeof(PolyWord))
putc('K', exportFile); // Weak ref
else if (p->Length() > sizeof(uintptr_t) / sizeof(PolyWord))
{
// Entry point - C null-terminated string.
putc('E', exportFile);
const char* name = (char*)p + sizeof(uintptr_t);
fprintf(exportFile, "%" PRI_SIZET "|%s", strlen(name), name);
*(uintptr_t*)p = 0; // Entry point
}
}
else
{
/* May be a string, a long format arbitrary precision
number or a real number. */
PolyStringObject* ps = (PolyStringObject*)p;
/* This is not infallible but it seems to be good enough
to detect the strings. */
POLYUNSIGNED bytes = length * sizeof(PolyWord);
if (length >= 2 &&
ps->length <= bytes - sizeof(POLYUNSIGNED) &&
ps->length > bytes - 2 * sizeof(POLYUNSIGNED))
{
/* Looks like a string. */
fprintf(exportFile, "S%" POLYUFMT "|", ps->length);
for (unsigned i = 0; i < ps->length; i++)
{
char ch = ps->chars[i];
fprintf(exportFile, "%02x", ch & 0xff);
}
}
else
{
/* Not a string. May be an arbitrary precision integer.
If the source and destination word lengths differ we
could find that some long-format arbitrary precision
numbers could be represented in the tagged short form
or vice-versa. The former case might give rise to
errors because when comparing two arbitrary precision
numbers for equality we assume that they are not equal
if they have different representation. The latter
case could be a problem because we wouldn't know whether
to convert the tagged form to long form, which would be
correct if the value has type "int" or to truncate it
which would be correct for "word".
It could also be a real number but that doesn't matter
if we recompile everything on the new machine.
*/
byte* u = (byte*)p;
putc('B', exportFile);
fprintf(exportFile, "%" PRI_SIZET "|", length * sizeof(PolyWord));
for (unsigned i = 0; i < (unsigned)(length * sizeof(PolyWord)); i++)
{
fprintf(exportFile, "%02x", u[i]);
}
}
}
}
else if (p->IsCodeObject())
{
POLYUNSIGNED constCount;
PolyWord *cp;
ASSERT(! p->IsMutable() );
/* Work out the number of bytes in the code and the
number of constants. */
- p->GetConstSegmentForCode(cp, constCount);
+ machineDependent->GetConstSegmentForCode(p, cp, constCount);
/* The byte count is the length of the segment minus the
number of constants minus one for the constant count.
It includes the marker word, byte count, profile count
and, on the X86/64 at least, any non-address constants.
These are actually word values. */
POLYUNSIGNED byteCount = (length - constCount - 2) * sizeof(PolyWord);
fprintf(exportFile, "F%" POLYUFMT ",%" POLYUFMT "|", constCount, byteCount);
// First the code.
byte *u = (byte*)p;
for (POLYUNSIGNED i = 0; i < byteCount; i++)
fprintf(exportFile, "%02x", u[i]);
putc('|', exportFile);
// Now the constants.
for (POLYUNSIGNED i = 0; i < constCount; i++)
{
printValue(cp[i]);
if (i < constCount-1)
putc(',', exportFile);
}
putc('|', exportFile);
// Finally any constants in the code object.
machineDependent->ScanConstantsWithinCode(p, this);
}
else // Ordinary objects, essentially tuples, or closures.
{
if (p->IsClosureObject())
{
POLYUNSIGNED nItems = length - sizeof(PolyObject*) / sizeof(PolyWord) + 1;
fprintf(exportFile, "C%" POLYUFMT "|", nItems); // Number of items
}
else fprintf(exportFile, "O%" POLYUFMT "|", length);
if (p->IsClosureObject())
{
// The first word is always a code address.
printAddress(*(PolyObject**)p);
i = sizeof(PolyObject*)/sizeof(PolyWord);
if (i < length)
putc(',', exportFile);
}
else i = 0;
while (i < length)
{
printValue(p->Get(i));
if (i < length-1)
putc(',', exportFile);
i++;
}
}
fprintf(exportFile, "\n");
}
/* This is called for each constant within the code.
Print a relocation entry for the word and return a value that means
that the offset is saved in original word. */
-void PExport::ScanConstant(PolyObject *base, byte *addr, ScanRelocationKind code)
+void PExport::ScanConstant(PolyObject *base, byte *addr, ScanRelocationKind code, intptr_t displacement)
{
- PolyObject *p = GetConstantValue(addr, code);
+ PolyObject *p = GetConstantValue(addr, code, displacement);
if (p == 0) return; // Don't put in tagged constants
// Put in the byte offset and the relocation type code.
POLYUNSIGNED offset = (POLYUNSIGNED)(addr - (byte*)base);
ASSERT (offset < base->Length() * sizeof(POLYUNSIGNED));
fprintf(exportFile, "%" POLYUFMT ",%d,", (POLYUNSIGNED)(addr - (byte*)base), code);
printAddress(p); // The value to plug in.
fprintf(exportFile, " ");
}
void PExport::exportStore(void)
{
// We want the entries in pMap to be in ascending
// order of address to make searching easy so we need to process the areas
// in order of increasing address, which may not be the order in memTable.
std::vector indexOrder;
indexOrder.reserve(memTableEntries);
for (size_t i = 0; i < memTableEntries; i++)
{
std::vector::iterator it;
for (it = indexOrder.begin(); it != indexOrder.end(); it++) {
if (memTable[*it].mtOriginalAddr >= memTable[i].mtOriginalAddr)
break;
}
indexOrder.insert(it, i);
}
// Process the area in order of ascending address.
for (std::vector::iterator i = indexOrder.begin(); i != indexOrder.end(); i++)
{
size_t index = *i;
char *start = (char*)memTable[index].mtOriginalAddr;
char *end = start + memTable[index].mtLength;
for (PolyWord *p = (PolyWord*)start; p < (PolyWord*)end; )
{
p++;
PolyObject *obj = (PolyObject*)p;
POLYUNSIGNED length = obj->Length();
pMap.push_back(obj);
p += length;
}
}
/* Start writing the information. */
fprintf(exportFile, "Objects\t%" PRI_SIZET "\n", pMap.size());
char arch = '?';
switch (machineDependent->MachineArchitecture())
{
case MA_Interpreted:
arch = 'I'; break;
case MA_I386: case MA_X86_64: case MA_X86_64_32:
arch = 'X'; break;
case MA_Arm64: case MA_Arm64_32:
arch = 'A'; break;
}
fprintf(exportFile, "Root\t%" PRI_SIZET " %c %u\n", getIndex(rootFunction), arch, (unsigned)sizeof(PolyWord));
// Generate each of the areas.
for (size_t i = 0; i < memTableEntries; i++)
{
char *start = (char*)memTable[i].mtOriginalAddr;
char *end = start + memTable[i].mtLength;
for (PolyWord *p = (PolyWord*)start; p < (PolyWord*)end; )
{
p++;
PolyObject *obj = (PolyObject*)p;
POLYUNSIGNED length = obj->Length();
#ifdef POLYML32IN64
// We may have filler cells to get the alignment right.
// We mustn't try to print them.
if (((uintptr_t)obj & 4) != 0 && length == 0)
continue;
#endif
printObject(obj);
p += length;
}
}
fclose(exportFile); exportFile = NULL;
}
/*
Import a portable export file and load it into memory.
Creates "permanent" address entries in the global memory table.
*/
class SpaceAlloc
{
public:
SpaceAlloc(unsigned *indexCtr, unsigned perms, POLYUNSIGNED def);
PolyObject *NewObj(POLYUNSIGNED objWords);
size_t defaultSize;
PermanentMemSpace *memSpace;
size_t used;
unsigned permissions;
unsigned *spaceIndexCtr;
};
SpaceAlloc::SpaceAlloc(unsigned *indexCtr, unsigned perms, POLYUNSIGNED def)
{
permissions = perms;
defaultSize = def;
memSpace = 0;
used = 0;
spaceIndexCtr = indexCtr;
}
// Allocate a new object. May create a new space and add the old one to the permanent
// memory table if this is exhausted.
#ifndef POLYML32IN64
PolyObject *SpaceAlloc::NewObj(POLYUNSIGNED objWords)
{
if (memSpace == 0 || memSpace->spaceSize() - used <= objWords)
{
// Need some more space.
size_t size = defaultSize;
if (size <= objWords)
size = objWords+1;
memSpace =
gMem.AllocateNewPermanentSpace(size * sizeof(PolyWord), permissions, *spaceIndexCtr);
(*spaceIndexCtr)++;
// The memory is writable until CompletePermanentSpaceAllocation is called
if (memSpace == 0)
{
fprintf(polyStderr, "Unable to allocate memory\n");
return 0;
}
used = 0;
}
ASSERT(memSpace->spaceSize() - used > objWords);
PolyObject *newObj = (PolyObject*)(memSpace->bottom + used+1);
used += objWords+1;
return newObj;
}
#else
// With 32in64 we need to allocate on 8-byte boundaries.
PolyObject *SpaceAlloc::NewObj(POLYUNSIGNED objWords)
{
size_t rounded = objWords;
if ((objWords & 1) == 0) rounded++;
if (memSpace == 0 || memSpace->spaceSize() - used <= rounded)
{
// Need some more space.
size_t size = defaultSize;
if (size <= rounded)
size = rounded + 1;
memSpace =
gMem.AllocateNewPermanentSpace(size * sizeof(PolyWord), permissions, *spaceIndexCtr);
(*spaceIndexCtr)++;
// The memory is writable until CompletePermanentSpaceAllocation is called
if (memSpace == 0)
{
fprintf(stderr, "Unable to allocate memory\n");
return 0;
}
memSpace->writeAble(memSpace->bottom)[0] = PolyWord::FromUnsigned(0);
used = 1;
}
PolyObject *newObj = (PolyObject*)(memSpace->bottom + used + 1);
if (rounded != objWords) memSpace->writeAble(newObj)->Set(objWords, PolyWord::FromUnsigned(0));
used += rounded + 1;
ASSERT(((uintptr_t)newObj & 0x7) == 0);
return newObj;
}
#endif
class PImport
{
public:
PImport();
~PImport();
bool DoImport(void);
FILE *f;
PolyObject *Root(void) { return objMap[nRoot]; }
private:
bool ReadValue(PolyObject *p, POLYUNSIGNED i);
bool GetValue(PolyWord *result);
POLYUNSIGNED nObjects, nRoot;
PolyObject **objMap;
unsigned spaceIndex;
SpaceAlloc mutSpace, immutSpace, codeSpace;
};
PImport::PImport():
mutSpace(&spaceIndex, MTF_WRITEABLE, 1024*1024),
immutSpace(&spaceIndex, 0, 1024*1024),
codeSpace(&spaceIndex, MTF_EXECUTABLE, 1024 * 1024)
{
f = NULL;
objMap = 0;
spaceIndex = 1;
}
PImport::~PImport()
{
if (f)
fclose(f);
free(objMap);
}
bool PImport::GetValue(PolyWord *result)
{
int ch = getc(f);
if (ch == '@')
{
/* Address of an object. */
POLYUNSIGNED obj;
fscanf(f, "%" POLYUFMT, &obj);
ASSERT(obj < nObjects);
*result = objMap[obj];
}
else if ((ch >= '0' && ch <= '9') || ch == '-')
{
/* Tagged integer. */
POLYSIGNED j;
ungetc(ch, f);
fscanf(f, "%" POLYSFMT, &j);
/* The assertion may be false if we are porting to a machine
with a shorter tagged representation. */
ASSERT(j >= -MAXTAGGED-1 && j <= MAXTAGGED);
*result = TAGGED(j);
}
else
{
fprintf(polyStderr, "Unexpected character in stream");
return false;
}
return true;
}
/* Read a value and store it at the specified word. */
bool PImport::ReadValue(PolyObject *p, POLYUNSIGNED i)
{
PolyWord result = TAGGED(0);
if (GetValue(&result))
{
p->Set(i, result);
return true;
}
else return false;
}
bool PImport::DoImport()
{
int ch;
POLYUNSIGNED objNo;
ASSERT(gMem.pSpaces.size() == 0);
ASSERT(gMem.eSpaces.size() == 0);
ch = getc(f);
ASSERT(ch == 'O'); /* Number of objects. */
while (getc(f) != '\t') ;
fscanf(f, "%" POLYUFMT, &nObjects);
/* Create a mapping table. */
objMap = (PolyObject**)calloc(nObjects, sizeof(PolyObject*));
if (objMap == 0)
{
fprintf(polyStderr, "Unable to allocate memory\n");
return false;
}
do
{
ch = getc(f);
} while (ch == '\n');
ASSERT(ch == 'R'); /* Root object number. */
while (getc(f) != '\t') ;
fscanf(f, "%" POLYUFMT, &nRoot);
do { ch = getc(f); } while (ch == ' ' || ch == '\t');
// Older versions did not have the architecture and word length.
if (ch != '\r' && ch != '\n')
{
unsigned wordLength;
while (ch == ' ' || ch == '\t') ch = getc(f);
char arch = ch;
ch = getc(f);
fscanf(f, "%u", &wordLength);
// If we're booting a native code version from interpreted
// code we have to interpret.
machineDependent->SetBootArchitecture(arch, wordLength);
}
/* Now the objects themselves. */
while (1)
{
unsigned objBits = 0;
POLYUNSIGNED nWords, nBytes;
do
{
ch = getc(f);
} while (ch == '\r' || ch == '\n');
if (ch == EOF) break;
ungetc(ch, f);
fscanf(f, "%" POLYUFMT, &objNo);
ch = getc(f);
ASSERT(ch == ':');
ASSERT(objNo < nObjects);
/* Modifiers, MNVW. */
do
{
ch = getc(f);
if (ch == 'M') objBits |= F_MUTABLE_BIT;
else if (ch == 'N') objBits |= F_NEGATIVE_BIT;
if (ch == 'V') objBits |= F_NO_OVERWRITE;
if (ch == 'W') objBits |= F_WEAK_BIT;
} while (ch == 'M' || ch == 'N' || ch == 'V' || ch == 'W');
/* Object type. */
switch (ch)
{
case 'O': /* Simple object. */
fscanf(f, "%" POLYUFMT, &nWords);
break;
case 'B': /* Byte segment. */
objBits |= F_BYTE_OBJ;
fscanf(f, "%" POLYUFMT, &nBytes);
/* Round up to appropriate number of words. */
nWords = (nBytes + sizeof(PolyWord) -1) / sizeof(PolyWord);
break;
case 'S': /* String. */
objBits |= F_BYTE_OBJ;
/* The length is the number of characters. */
fscanf(f, "%" POLYUFMT, &nBytes);
/* Round up to appropriate number of words. Need to add
one PolyWord for the length PolyWord. */
nWords = (nBytes + sizeof(PolyWord) -1) / sizeof(PolyWord) + 1;
break;
case 'F':
objBits |= F_CODE_OBJ;
/* Read the number of bytes of code and the number of words
for constants. */
fscanf(f, "%" POLYUFMT ",%" POLYUFMT, &nWords, &nBytes);
nWords += 2; // Add two words for no of consts + offset.
/* Add in the size of the code itself. */
nWords += (nBytes + sizeof(PolyWord) -1) / sizeof(PolyWord);
break;
case 'C': // Closure
objBits |= F_CLOSURE_OBJ;
fscanf(f, "%" POLYUFMT, &nWords); // This is the number of items.
nWords += sizeof(PolyObject*) / sizeof(PolyWord) - 1;
break;
case 'L': // Legacy closure
objBits |= F_CLOSURE_OBJ;
fscanf(f, "%" POLYUFMT, &nWords); // This was the number of words.
break;
case 'K': // Single weak reference
nWords = sizeof(uintptr_t)/sizeof(PolyWord);
objBits |= F_BYTE_OBJ;
break;
case 'E': // Entry point - address followed by string
objBits |= F_BYTE_OBJ;
// The length is the length of the string but it must be null-terminated
fscanf(f, "%" POLYUFMT, &nBytes);
// Add one uintptr_t plus one plus padding to an integral number of words.
nWords = (nBytes + sizeof(uintptr_t) + sizeof(PolyWord)) / sizeof(PolyWord);
break;
default:
fprintf(polyStderr, "Invalid object type\n");
return false;
}
SpaceAlloc* alloc;
if (objBits & F_MUTABLE_BIT)
alloc = &mutSpace;
else if ((objBits & 3) == F_CODE_OBJ)
alloc = &codeSpace;
else alloc = &immutSpace;
PolyObject* p = alloc->NewObj(nWords);
if (p == 0)
return false;
objMap[objNo] = p;
/* Put in length PolyWord and flag bits. */
alloc->memSpace->writeAble(p)->SetLengthWord(nWords, objBits);
/* Skip the object contents. */
while (getc(f) != '\n') ;
}
/* Second pass - fill in the contents. */
fseek(f, 0, SEEK_SET);
/* Skip the information at the start. */
ch = getc(f);
ASSERT(ch == 'O'); /* Number of objects. */
while (getc(f) != '\n');
ch = getc(f);
ASSERT(ch == 'R'); /* Root object number. */
while (getc(f) != '\n') ;
while (1)
{
if (feof(f))
break;
fscanf(f, "%" POLYUFMT, &objNo);
if (feof(f))
break;
ch = getc(f);
ASSERT(ch == ':');
ASSERT(objNo < nObjects);
PolyObject * p = objMap[objNo];
/* Modifiers, M or N. */
do
{
ch = getc(f);
} while (ch == 'M' || ch == 'N' || ch == 'V' || ch == 'W');
/* Object type. */
switch (ch)
{
case 'O': /* Simple object. */
case 'C': // Closure
case 'L': // Legacy closure
{
POLYUNSIGNED nWords;
bool isClosure = ch == 'C' || ch == 'L';
fscanf(f, "%" POLYUFMT, &nWords);
if (ch == 'C') nWords += sizeof(PolyObject*) / sizeof(PolyWord) - 1;
ch = getc(f);
ASSERT(ch == '|');
ASSERT(nWords == p->Length());
POLYUNSIGNED i = 0;
if (isClosure)
{
int ch = getc(f);
// This should be an address
if (ch != '@') return false;
POLYUNSIGNED obj;
fscanf(f, "%" POLYUFMT, &obj);
ASSERT(obj < nObjects);
*(PolyObject**)p = objMap[obj];
ch = getc(f);
i = sizeof(PolyObject*) / sizeof(PolyWord);
}
while (i < nWords)
{
if (!ReadValue(p, i))
return false;
ch = getc(f);
ASSERT((ch == ',' && i < nWords - 1) ||
(ch == '\n' && i == nWords - 1));
i++;
}
break;
}
case 'B': /* Byte segment. */
{
byte *u = (byte*)p;
POLYUNSIGNED nBytes;
fscanf(f, "%" POLYUFMT, &nBytes);
ch = getc(f); ASSERT(ch == '|');
for (POLYUNSIGNED i = 0; i < nBytes; i++)
{
int n;
fscanf(f, "%02x", &n);
u[i] = n;
}
ch = getc(f);
ASSERT(ch == '\n');
// Legacy: If this is an entry point object set its value.
if (p->IsMutable() && p->IsWeakRefObject() && p->Length() > sizeof(uintptr_t)/sizeof(PolyWord))
{
bool loadEntryPt = setEntryPoint(p);
ASSERT(loadEntryPt);
}
break;
}
case 'S': /* String. */
{
PolyStringObject * ps = (PolyStringObject *)p;
/* The length is the number of characters. */
POLYUNSIGNED nBytes;
fscanf(f, "%" POLYUFMT, &nBytes);
ch = getc(f); ASSERT(ch == '|');
ps->length = nBytes;
for (POLYUNSIGNED i = 0; i < nBytes; i++)
{
int n;
fscanf(f, "%02x", &n);
ps->chars[i] = n;
}
ch = getc(f);
ASSERT(ch == '\n');
break;
}
case 'D':
case 'F':
{
bool newForm = ch == 'F';
POLYUNSIGNED length = p->Length();
POLYUNSIGNED nWords, nBytes;
MemSpace* space = gMem.SpaceForObjectAddress(p);
PolyObject *wr = space->writeAble(p);
byte* u = (byte*)wr;
/* Read the number of bytes of code and the number of words
for constants. */
fscanf(f, "%" POLYUFMT ",%" POLYUFMT, &nWords, &nBytes);
/* Read the code. */
ch = getc(f); ASSERT(ch == '|');
for (POLYUNSIGNED i = 0; i < nBytes; i++)
{
int n;
fscanf(f, "%02x", &n);
u[i] = n;
}
ch = getc(f);
ASSERT(ch == '|');
if (newForm)
{
wr->Set(length - nWords - 2, PolyWord::FromUnsigned(nWords));
wr->Set(length - 1, PolyWord::FromSigned((0-nWords-1)*sizeof(PolyWord)));
}
else wr->Set(length-1, PolyWord::FromUnsigned(nWords));
/* Read in the constants. */
for (POLYUNSIGNED i = 0; i < nWords; i++)
{
if (! ReadValue(wr, i+length-nWords-1))
return false;
ch = getc(f);
ASSERT((ch == ',' && i < nWords-1) ||
((ch == '\n' || ch == '|') && i == nWords-1));
}
// Read in any constants in the code.
if (ch == '|')
{
ch = getc(f);
while (ch != '\n')
{
ungetc(ch, f);
POLYUNSIGNED offset;
int code;
fscanf(f, "%" POLYUFMT ",%d", &offset, &code);
ch = getc(f);
ASSERT(ch == ',');
// This should be an address.
ch = getc(f);
if (ch == '@')
{
POLYUNSIGNED obj;
fscanf(f, "%" POLYUFMT, &obj);
ASSERT(obj < nObjects);
PolyObject *addr = objMap[obj];
byte *toPatch = (byte*)p + offset; // Pass the execute address here.
ScanAddress::SetConstantValue(toPatch, addr, (ScanRelocationKind)code);
}
else
{
// Previously we also included tagged constants but they are
// already in the code.
ungetc(ch, f);
PolyWord w;
if (!GetValue(&w))
return false;
}
do ch = getc(f); while (ch == ' ');
}
}
// Clear the mutable bit
wr->SetLengthWord(p->Length(), F_CODE_OBJ);
break;
}
case 'K':
// Weak reference - must be zeroed
*(uintptr_t*)p = 0;
break;
case 'E':
// Entry point - address followed by string
{
// The length is the number of characters.
*(uintptr_t*)p = 0;
char* b = (char*)p + sizeof(uintptr_t);
POLYUNSIGNED nBytes;
fscanf(f, "%" POLYUFMT, &nBytes);
ch = getc(f); ASSERT(ch == '|');
for (POLYUNSIGNED i = 0; i < nBytes; i++)
{
ch = getc(f);
*b++ = ch;
}
*b = 0;
ch = getc(f);
ASSERT(ch == '\n');
bool loadEntryPt = setEntryPoint(p);
ASSERT(loadEntryPt);
break;
}
default:
fprintf(polyStderr, "Invalid object type\n");
return false;
}
}
// Now remove write access from immutable spaces.
for (std::vector::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++)
gMem.CompletePermanentSpaceAllocation(*i);
return true;
}
// Import a file in the portable format and return a pointer to the root object.
PolyObject *ImportPortable(const TCHAR *fileName)
{
PImport pImport;
#if (defined(_WIN32) && defined(UNICODE))
pImport.f = _wfopen(fileName, L"r");
if (pImport.f == 0)
{
fprintf(polyStderr, "Unable to open file: %S\n", fileName);
return 0;
}
#else
pImport.f = fopen(fileName, "r");
if (pImport.f == 0)
{
fprintf(polyStderr, "Unable to open file: %s\n", fileName);
return 0;
}
#endif
if (pImport.DoImport())
return pImport.Root();
else
return 0;
}
diff --git a/libpolyml/pexport.h b/libpolyml/pexport.h
index dbd57682..3a214fa7 100644
--- a/libpolyml/pexport.h
+++ b/libpolyml/pexport.h
@@ -1,67 +1,67 @@
/*
Title: Export memory in a portable format
Author: David C. J. Matthews.
- Copyright (c) 2006, 2015, 2017 David C. J. Matthews
+ Copyright (c) 2006, 2015, 2017, 2020 David C. J. Matthews
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR H PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef PEXPORT_H_INCLUDED
#define PEXPORT_H_INCLUDED
#ifdef HAVE_TCHAR_H
#include
#else
typedef char TCHAR;
#endif
#include
#include "scanaddrs.h" // For base class
#include "exporter.h"
#include "globals.h"
class PExport: public Exporter, public ScanAddress
{
public:
PExport();
virtual ~PExport();
public:
virtual void exportStore(void);
private:
// ScanAddress overrides
- virtual void ScanConstant(PolyObject *base, byte *addrOfConst, ScanRelocationKind code);
+ virtual void ScanConstant(PolyObject *base, byte *addrOfConst, ScanRelocationKind code, intptr_t displacement);
// At the moment we should only get calls to ScanConstant.
virtual PolyObject *ScanObjectAddress(PolyObject *base) { return base; }
private:
size_t getIndex(PolyObject *p);
void printAddress(void *p);
void printValue(PolyWord q);
void printObject(PolyObject *p);
// We don't use the relocation code so just provide a dummy function here.
virtual PolyWord createRelocation(PolyWord p, void *relocAddr) { return p; }
std::vector pMap;
};
// Import a file in the portable format and return a pointer to the root object.
PolyObject *ImportPortable(const TCHAR *fileName);
#endif
diff --git a/libpolyml/poly_specific.cpp b/libpolyml/poly_specific.cpp
index 47288e21..e71e5af8 100644
--- a/libpolyml/poly_specific.cpp
+++ b/libpolyml/poly_specific.cpp
@@ -1,455 +1,455 @@
/*
Title: poly_specific.cpp - Poly/ML specific RTS calls.
Copyright (c) 2006, 2015-17, 2019, 2021 David C. J. Matthews
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/* This module is used for various run-time calls that are either in the
PolyML structure or otherwise specific to Poly/ML. */
#ifdef HAVE_CONFIG_H
#include "config.h"
#elif defined(_WIN32)
#include "winconfig.h"
#else
#error "No configuration file"
#endif
#ifdef HAVE_ASSERT_H
#include
#define ASSERT(x) assert(x)
#else
#define ASSERT(x) 0
#endif
#ifdef HAVE_STRING_H
#include
#endif
#include "globals.h"
#include "poly_specific.h"
#include "arb.h"
#include "mpoly.h"
#include "sys.h"
#include "machine_dep.h"
#include "polystring.h"
#include "run_time.h"
#include "version.h"
#include "save_vec.h"
#include "version.h"
#include "memmgr.h"
#include "processes.h"
#include "gc.h"
#include "rtsentry.h"
extern "C" {
POLYEXTERNALSYMBOL POLYUNSIGNED PolySpecificGeneral(FirstArgument threadId, PolyWord code, PolyWord arg);
POLYEXTERNALSYMBOL POLYUNSIGNED PolyGetABI();
POLYEXTERNALSYMBOL POLYUNSIGNED PolyLockMutableClosure(FirstArgument threadId, PolyWord closure);
POLYEXTERNALSYMBOL POLYUNSIGNED PolyCopyByteVecToClosure(FirstArgument threadId, PolyWord byteVec, PolyWord closure);
POLYEXTERNALSYMBOL POLYUNSIGNED PolySetCodeConstant(PolyWord closure, PolyWord offset, PolyWord c, PolyWord flags);
POLYEXTERNALSYMBOL POLYUNSIGNED PolyGetCodeConstant(PolyWord closure, PolyWord offset, PolyWord flags);
POLYEXTERNALSYMBOL POLYUNSIGNED PolySetCodeByte(PolyWord closure, PolyWord offset, PolyWord c);
POLYEXTERNALSYMBOL POLYUNSIGNED PolyGetCodeByte(PolyWord closure, PolyWord offset);
POLYEXTERNALSYMBOL POLYUNSIGNED PolySortArrayOfAddresses(PolyWord array);
POLYEXTERNALSYMBOL POLYUNSIGNED PolyTest4(FirstArgument threadId, PolyWord arg1, PolyWord arg2, PolyWord arg3, PolyWord arg4);
POLYEXTERNALSYMBOL POLYUNSIGNED PolyTest5(FirstArgument threadId, PolyWord arg1, PolyWord arg2, PolyWord arg3, PolyWord arg4, PolyWord arg5);
}
#define SAVE(x) taskData->saveVec.push(x)
#ifndef GIT_VERSION
#define GIT_VERSION ""
#endif
Handle poly_dispatch_c(TaskData *taskData, Handle args, Handle code)
{
unsigned c = get_C_unsigned(taskData, DEREFWORD(code));
switch (c)
{
case 9: // Return the GIT version if appropriate
{
return SAVE(C_string_to_Poly(taskData, GIT_VERSION));
}
case 10: // Return the RTS version string.
{
const char *version;
switch (machineDependent->MachineArchitecture())
{
case MA_Interpreted: version = "Portable-" TextVersion; break;
case MA_I386: version = "I386-" TextVersion; break;
case MA_X86_64: version = "X86_64-" TextVersion; break;
case MA_X86_64_32: version = "X86_64_32-" TextVersion; break;
case MA_Arm64: version = "Arm64-" TextVersion; break;
case MA_Arm64_32: version = "Arm64_32-" TextVersion; break;
default: version = "Unknown-" TextVersion; break;
}
return SAVE(C_string_to_Poly(taskData, version));
}
case 12: // Return the architecture
// Used in InitialPolyML.ML for PolyML.architecture
{
const char *arch;
switch (machineDependent->MachineArchitecture())
{
case MA_Interpreted: arch = "Interpreted"; break;
case MA_I386: arch = "I386"; break;
case MA_X86_64: arch = "X86_64"; break;
case MA_X86_64_32: arch = "X86_64_32"; break;
case MA_Arm64: arch = "Arm64"; break;
case MA_Arm64_32: arch = "Arm64_32"; break;
default: arch = "Unknown"; break;
}
return SAVE(C_string_to_Poly(taskData, arch));
}
case 19: // Return the RTS argument help string.
return SAVE(C_string_to_Poly(taskData, RTSArgHelp()));
default:
{
char msg[100];
sprintf(msg, "Unknown poly-specific function: %d", c);
raise_exception_string(taskData, EXC_Fail, msg);
return 0;
}
}
}
// General interface to poly-specific. Ideally the various cases will be made into
// separate functions.
POLYUNSIGNED PolySpecificGeneral(FirstArgument threadId, PolyWord code, PolyWord arg)
{
TaskData *taskData = TaskData::FindTaskForId(threadId);
ASSERT(taskData != 0);
taskData->PreRTSCall();
Handle reset = taskData->saveVec.mark();
Handle pushedCode = taskData->saveVec.push(code);
Handle pushedArg = taskData->saveVec.push(arg);
Handle result = 0;
try {
result = poly_dispatch_c(taskData, pushedArg, pushedCode);
} catch (...) { } // If an ML exception is raised
taskData->saveVec.reset(reset);
taskData->PostRTSCall();
if (result == 0) return TAGGED(0).AsUnsigned();
else return result->Word().AsUnsigned();
}
// Return the ABI - i.e. the calling conventions used when calling external functions.
POLYEXTERNALSYMBOL POLYUNSIGNED PolyGetABI()
{
// Return the ABI. For 64-bit we need to know if this is Windows.
#if (SIZEOF_VOIDP == 8)
#if (defined(_WIN32) || defined(__CYGWIN__))
return TAGGED(2).AsUnsigned(); // 64-bit Windows
#else
return TAGGED(1).AsUnsigned(); // 64-bit Unix
#endif
#else
return TAGGED(0).AsUnsigned(); // 32-bit Unix and Windows
#endif
}
// Code generation - Code is initially allocated in a byte segment. When all the
// values have been set apart from any addresses the byte segment is copied into
// a mutable code segment.
// Copy the byte vector into code space.
POLYUNSIGNED PolyCopyByteVecToClosure(FirstArgument threadId, PolyWord byteVec, PolyWord closure)
{
TaskData *taskData = TaskData::FindTaskForId(threadId);
ASSERT(taskData != 0);
taskData->PreRTSCall();
Handle reset = taskData->saveVec.mark();
Handle pushedByteVec = taskData->saveVec.push(byteVec);
Handle pushedClosure = taskData->saveVec.push(closure);
PolyObject *result = 0;
#ifdef HAVE_PTHREAD_JIT_WRITE_PROTECT_NP
pthread_jit_write_protect_np(false);
#endif
try {
if (!pushedByteVec->WordP()->IsByteObject())
raise_fail(taskData, "Not byte data area");
if (pushedClosure->WordP()->Length() != sizeof(PolyObject*)/sizeof(PolyWord))
raise_fail(taskData, "Invalid closure size");
if (!pushedClosure->WordP()->IsMutable())
raise_fail(taskData, "Closure is not mutable");
do {
PolyObject *initCell = pushedByteVec->WordP();
POLYUNSIGNED requiredSize = initCell->Length();
result = gMem.AllocCodeSpace(requiredSize);
if (result == 0)
{
// Could not allocate - must GC.
if (!QuickGC(taskData, pushedByteVec->WordP()->Length()))
raise_fail(taskData, "Insufficient memory");
}
else memcpy(gMem.SpaceForObjectAddress(result)->writeAble((byte*)result), initCell, requiredSize * sizeof(PolyWord));
} while (result == 0);
}
catch (...) {} // If an ML exception is raised
// Store the code address in the closure.
*((PolyObject**)pushedClosure->WordP()) = result;
// Lock the closure.
pushedClosure->WordP()->SetLengthWord(pushedClosure->WordP()->LengthWord() & ~_OBJ_MUTABLE_BIT);
#ifdef HAVE_PTHREAD_JIT_WRITE_PROTECT_NP
pthread_jit_write_protect_np(true);
#endif
taskData->saveVec.reset(reset);
taskData->PostRTSCall();
return TAGGED(0).AsUnsigned();
}
// Code generation - Lock a mutable code segment and return the original address.
// Currently this does not allocate so other than the exception it could
// be a fast call.
POLYEXTERNALSYMBOL POLYUNSIGNED PolyLockMutableClosure(FirstArgument threadId, PolyWord closure)
{
TaskData *taskData = TaskData::FindTaskForId(threadId);
ASSERT(taskData != 0);
taskData->PreRTSCall();
Handle reset = taskData->saveVec.mark();
PolyObject *codeObj = *(PolyObject**)(closure.AsObjPtr());
#ifdef HAVE_PTHREAD_JIT_WRITE_PROTECT_NP
pthread_jit_write_protect_np(false);
#endif
try {
if (!codeObj->IsCodeObject() || !codeObj->IsMutable())
raise_fail(taskData, "Not mutable code area");
POLYUNSIGNED segLength = codeObj->Length();
gMem.SpaceForObjectAddress(codeObj)->writeAble(codeObj)->SetLengthWord(segLength, F_CODE_OBJ);
// Flush cache on ARM at least.
machineDependent->FlushInstructionCache(codeObj, segLength * sizeof(PolyWord));
// In the future it may be necessary to return a different address here.
// N.B. The code area should only have execute permission in the native
// code version, not the interpreted version.
}
catch (...) {} // If an ML exception is raised
#ifdef HAVE_PTHREAD_JIT_WRITE_PROTECT_NP
pthread_jit_write_protect_np(true);
#endif
taskData->saveVec.reset(reset);
taskData->PostRTSCall();
return TAGGED(0).AsUnsigned();
}
// Set code constant. This can be a fast call.
// This is in the RTS both because we pass a closure in here and cannot have
// code addresses in 32-in-64 and also because we need to ensure there is no
// possibility of a GC while the code is an inconsistent state.
POLYUNSIGNED PolySetCodeConstant(PolyWord closure, PolyWord offset, PolyWord cWord, PolyWord flags)
{
byte *pointer;
#ifdef HAVE_PTHREAD_JIT_WRITE_PROTECT_NP
pthread_jit_write_protect_np(false);
#endif
// Previously we passed the code address in here and we need to
// retain that for legacy code. This is now the closure.
if (closure.AsObjPtr()->IsCodeObject())
pointer = closure.AsCodePtr();
else pointer = *(POLYCODEPTR*)(closure.AsObjPtr());
// pointer is the start of the code segment.
// c will usually be an address.
// offset is a byte offset
pointer += offset.UnTaggedUnsigned();
byte* writeable = gMem.SpaceForAddress(pointer)->writeAble(pointer);
switch (UNTAGGED(flags))
{
case 0: // Absolute constant - size PolyWord
{
POLYUNSIGNED c = cWord.AsUnsigned();
#ifdef WORDS_BIGENDIAN
// This is used to store constants in the constant area
// on the interpreted version.
for (unsigned i = sizeof(PolyWord); i > 0; i--)
{
writeable[i-1] = (byte)(c & 255);
c >>= 8;
}
#else
for (unsigned i = 0; i < sizeof(PolyWord); i++)
{
writeable[i] = (byte)(c & 255);
c >>= 8;
}
#endif
break;
}
case 1: // Relative constant - X86 - size 4 bytes
{
// The offset is relative to the END of the constant.
byte *target;
// In 32-in-64 we pass in the closure address here
// rather than the code address.
if (cWord.AsObjPtr()->IsCodeObject())
target = cWord.AsCodePtr();
else target = *(POLYCODEPTR*)(cWord.AsObjPtr());
size_t c = target - pointer - 4;
- for (unsigned i = 0; i < sizeof(PolyWord); i++)
+ for (unsigned i = 0; i < 4; i++)
{
writeable[i] = (byte)(c & 255);
c >>= 8;
}
break;
}
case 2: // Absolute constant - size uintptr_t
// This is the same as case 0 except in 32-in-64 when
// it is an absolute address rather than an object pointer.
{
uintptr_t c = (uintptr_t)(cWord.AsObjPtr());
for (unsigned i = 0; i < sizeof(uintptr_t); i++)
{
pointer[i] = (byte)(c & 255);
c >>= 8;
}
break;
}
}
#ifdef HAVE_PTHREAD_JIT_WRITE_PROTECT_NP
pthread_jit_write_protect_np(true);
#endif
return TAGGED(0).AsUnsigned();
}
// Get a code constant. This is only used for debugging.
POLYUNSIGNED PolyGetCodeConstant(PolyWord closure, PolyWord offset, PolyWord flags)
{
byte* pointer = *(POLYCODEPTR*)(closure.AsObjPtr());
// offset is a byte offset
pointer += offset.UnTaggedUnsigned();
switch (UNTAGGED(flags))
{
case 0: // Absolute constant - size PolyWord
{
POLYUNSIGNED c = 0;
#ifdef WORDS_BIGENDIAN
for (unsigned i = 0; i < sizeof(PolyWord); i++)
c = (c << 8) | pointer[i];
#else
for (unsigned i = sizeof(PolyWord); i > 0; i--)
c = (c << 8) | pointer[i-1];
#endif
return c;
}
}
// For the moment just handle that case.
return TAGGED(0).AsUnsigned();
}
// Set a code byte. This needs to be in the RTS because it uses the closure
POLYEXTERNALSYMBOL POLYUNSIGNED PolySetCodeByte(PolyWord closure, PolyWord offset, PolyWord cWord)
{
byte *pointer = *(POLYCODEPTR*)(closure.AsObjPtr());
byte* writable = gMem.SpaceForAddress(pointer)->writeAble(pointer);
writable[UNTAGGED_UNSIGNED(offset)] = (byte)UNTAGGED_UNSIGNED(cWord);
return TAGGED(0).AsUnsigned();
}
POLYEXTERNALSYMBOL POLYUNSIGNED PolyGetCodeByte(PolyWord closure, PolyWord offset)
{
byte *pointer = *(POLYCODEPTR*)(closure.AsObjPtr());
return TAGGED(pointer[UNTAGGED_UNSIGNED(offset)]).AsUnsigned();
}
static int compare(const void *a, const void *b)
{
PolyWord *av = (PolyWord*)a;
PolyWord *bv = (PolyWord*)b;
if ((*av).IsTagged() || (*bv).IsTagged()) return 0; // Shouldn't happen
PolyObject *ao = (*av).AsObjPtr(), *bo = (*bv).AsObjPtr();
if (ao->Length() < 1 || bo->Length() < 1) return 0; // Shouldn't happen
if (ao->Get(0).AsUnsigned() < bo->Get(0).AsUnsigned())
return -1;
if (ao->Get(0).AsUnsigned() > bo->Get(0).AsUnsigned())
return 1;
return 0;
}
// Sort an array of addresses. This is used in the code-generator to search for
// duplicates in the address area. The argument is an array of pairs. The first
// item of each pair is an address, the second is an identifier of some kind.
POLYEXTERNALSYMBOL POLYUNSIGNED PolySortArrayOfAddresses(PolyWord array)
{
if (!array.IsDataPtr()) return(TAGGED(0)).AsUnsigned();
PolyObject *arrayP = array.AsObjPtr();
POLYUNSIGNED numberOfItems = arrayP->Length();
if (!arrayP->IsMutable()) return(TAGGED(0)).AsUnsigned();
qsort(arrayP, numberOfItems, sizeof(PolyWord), compare);
return (TAGGED(1)).AsUnsigned();
}
POLYEXTERNALSYMBOL POLYUNSIGNED PolyTest4(FirstArgument threadId, PolyWord arg1, PolyWord arg2, PolyWord arg3, PolyWord arg4)
{
switch (arg1.UnTaggedUnsigned())
{
case 1: return arg1.AsUnsigned();
case 2: return arg2.AsUnsigned();
case 3: return arg3.AsUnsigned();
case 4: return arg4.AsUnsigned();
default: return TAGGED(0).AsUnsigned();
}
}
POLYEXTERNALSYMBOL POLYUNSIGNED PolyTest5(FirstArgument threadId, PolyWord arg1, PolyWord arg2, PolyWord arg3, PolyWord arg4, PolyWord arg5)
{
switch (arg1.UnTaggedUnsigned())
{
case 1: return arg1.AsUnsigned();
case 2: return arg2.AsUnsigned();
case 3: return arg3.AsUnsigned();
case 4: return arg4.AsUnsigned();
case 5: return arg5.AsUnsigned();
default: return TAGGED(0).AsUnsigned();
}
}
struct _entrypts polySpecificEPT[] =
{
{ "PolySpecificGeneral", (polyRTSFunction)&PolySpecificGeneral},
{ "PolyGetABI", (polyRTSFunction)&PolyGetABI },
{ "PolyCopyByteVecToClosure", (polyRTSFunction)&PolyCopyByteVecToClosure },
{ "PolyLockMutableClosure", (polyRTSFunction)&PolyLockMutableClosure },
{ "PolySetCodeConstant", (polyRTSFunction)&PolySetCodeConstant },
{ "PolyGetCodeConstant", (polyRTSFunction)&PolyGetCodeConstant },
{ "PolySetCodeByte", (polyRTSFunction)&PolySetCodeByte },
{ "PolyGetCodeByte", (polyRTSFunction)&PolyGetCodeByte },
{ "PolySortArrayOfAddresses", (polyRTSFunction)&PolySortArrayOfAddresses },
{ "PolyTest4", (polyRTSFunction)&PolyTest4 },
{ "PolyTest5", (polyRTSFunction)&PolyTest5 },
{ NULL, NULL} // End of list.
};
diff --git a/libpolyml/process_env.cpp b/libpolyml/process_env.cpp
index fa5ea179..145594d1 100644
--- a/libpolyml/process_env.cpp
+++ b/libpolyml/process_env.cpp
@@ -1,669 +1,669 @@
/*
Title: Process environment.
Copyright (c) 2000-8, 2016-17, 2020
David C. J. Matthews
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#elif defined(_WIN32)
#include "winconfig.h"
#else
#error "No configuration file"
#endif
#ifdef HAVE_STDIO_H
#include
#endif
#ifdef HAVE_ERRNO_H
#include
#endif
#ifdef HAVE_STRING_H
#include
#endif
#ifdef HAVE_STDLIB_H
#include
#endif
#ifdef HAVE_SYS_PARAM_H
#include
#endif
#ifdef HAVE_UNISTD_H
#include
#endif
#ifdef HAVE_SYS_WAIT_H
#include
#endif
#if (defined(__CYGWIN__) || defined(_WIN32))
#include
#endif
#ifdef HAVE_ASSERT_H
#include
#define ASSERT(x) assert(x)
#else
#define ASSERT(x) 0
#endif
// Include this next before errors.h since in WinCE at least the winsock errors are defined there.
#if (defined(_WIN32))
#include
#include
#define NOMEMORY ERROR_NOT_ENOUGH_MEMORY
#undef ENOMEM
#else
typedef char TCHAR;
#define _tgetenv getenv
#define NOMEMORY ENOMEM
#endif
#include "globals.h"
#include "sys.h"
#include "run_time.h"
#include "process_env.h"
#include "arb.h"
#include "mpoly.h"
#include "gc.h"
#include "scanaddrs.h"
#include "polystring.h"
#include "save_vec.h"
#include "process_env.h"
#include "rts_module.h"
#include "machine_dep.h"
#include "processes.h"
#include "locking.h"
#include "errors.h"
#include "rtsentry.h"
#include "version.h"
extern "C" {
POLYEXTERNALSYMBOL void PolyFinish(FirstArgument threadId, PolyWord arg);
POLYEXTERNALSYMBOL void PolyTerminate(FirstArgument threadId, PolyWord arg);
POLYEXTERNALSYMBOL POLYUNSIGNED PolyProcessEnvGeneral(FirstArgument threadId, PolyWord code, PolyWord arg);
POLYEXTERNALSYMBOL POLYUNSIGNED PolyProcessEnvErrorName(FirstArgument threadId, PolyWord syserr);
POLYEXTERNALSYMBOL POLYUNSIGNED PolyProcessEnvErrorMessage(FirstArgument threadId, PolyWord syserr);
POLYEXTERNALSYMBOL POLYUNSIGNED PolyProcessEnvErrorFromString(FirstArgument threadId, PolyWord string);
POLYEXTERNALSYMBOL POLYUNSIGNED PolyGetMaxAllocationSize();
POLYEXTERNALSYMBOL POLYUNSIGNED PolyGetMaxStringSize();
POLYEXTERNALSYMBOL POLYUNSIGNED PolyGetPolyVersionNumber();
POLYEXTERNALSYMBOL POLYUNSIGNED PolyGetFunctionName(FirstArgument threadId, PolyWord fnAddr);
POLYEXTERNALSYMBOL POLYUNSIGNED PolyGetProcessName(FirstArgument threadId);
POLYEXTERNALSYMBOL POLYUNSIGNED PolyGetCommandlineArguments(FirstArgument threadId);
POLYEXTERNALSYMBOL POLYUNSIGNED PolyGetEnv(FirstArgument threadId, PolyWord arg);
POLYEXTERNALSYMBOL POLYUNSIGNED PolyGetEnvironment(FirstArgument threadId);
POLYEXTERNALSYMBOL POLYUNSIGNED PolyProcessEnvSuccessValue(FirstArgument threadId);
POLYEXTERNALSYMBOL POLYUNSIGNED PolyProcessEnvFailureValue(FirstArgument threadId);
POLYEXTERNALSYMBOL POLYUNSIGNED PolyProcessEnvSystem(FirstArgument threadId, PolyWord arg);
}
#define SAVE(x) taskData->saveVec.push(x)
#define ALLOC(n) alloc_and_save(taskData, n)
#if (defined(_WIN32))
#define ISPATHSEPARATOR(c) ((c) == '\\' || (c) == '/')
#define DEFAULTSEPARATOR "\\"
#else
#define ISPATHSEPARATOR(c) ((c) == '/')
#define DEFAULTSEPARATOR "/"
#endif
#ifdef _MSC_VER
// Don't tell me about ISO C++ changes.
#pragma warning(disable:4996)
#endif
// "environ" is declared in the headers on some systems but not all.
// Oddly, declaring it within process_env_dispatch_c causes problems
// on mingw where "environ" is actually a function.
#if __APPLE__
// On Mac OS X there may be problems accessing environ directly.
#include
#define environ (*_NSGetEnviron())
#else
extern char **environ;
#endif
#ifdef __CYGWIN__
// Cygwin requires spawnvp to avoid the significant overhead of vfork
// but it doesn't seem to be thread-safe. Run it on the main thread
// to be sure.
class CygwinSpawnRequest: public MainThreadRequest
{
public:
CygwinSpawnRequest(char **argv): MainThreadRequest(MTP_CYGWINSPAWN), spawnArgv(argv) {}
virtual void Perform();
char **spawnArgv;
int pid;
};
void CygwinSpawnRequest::Perform()
{
pid = spawnvp(_P_NOWAIT, "/bin/sh", spawnArgv);
}
#endif
// These are now just legacy calls.
static Handle process_env_dispatch_c(TaskData *taskData, Handle args, Handle code)
{
unsigned c = get_C_unsigned(taskData, DEREFWORD(code));
switch (c)
{
case 1: /* Return the argument list. */
// This is used in the pre-built compilers.
return convert_string_list(taskData, userOptions.user_arg_count, userOptions.user_arg_strings);
default:
{
char msg[100];
sprintf(msg, "Unknown environment function: %d", c);
raise_exception_string(taskData, EXC_Fail, msg);
return 0;
}
}
}
// General interface to process-env. Ideally the various cases will be made into
// separate functions.
POLYUNSIGNED PolyProcessEnvGeneral(FirstArgument threadId, PolyWord code, PolyWord arg)
{
TaskData *taskData = TaskData::FindTaskForId(threadId);
ASSERT(taskData != 0);
taskData->PreRTSCall();
Handle reset = taskData->saveVec.mark();
Handle pushedCode = taskData->saveVec.push(code);
Handle pushedArg = taskData->saveVec.push(arg);
Handle result = 0;
try {
result = process_env_dispatch_c(taskData, pushedArg, pushedCode);
}
catch (KillException &) {
processes->ThreadExit(taskData); // May test for kill
}
catch (...) { } // If an ML exception is raised
taskData->saveVec.reset(reset); // Ensure the save vec is reset
taskData->PostRTSCall();
if (result == 0) return TAGGED(0).AsUnsigned();
else return result->Word().AsUnsigned();
}
// Terminate normally with a result code.
void PolyFinish(FirstArgument threadId, PolyWord arg)
{
TaskData *taskData = TaskData::FindTaskForId(threadId);
ASSERT(taskData != 0);
taskData->PreRTSCall();
int i = get_C_int(taskData, arg);
// Cause the other threads to exit and set the result code.
processes->RequestProcessExit(i);
// Exit this thread
processes->ThreadExit(taskData); // Doesn't return.
}
// Terminate without running the atExit list or flushing buffers
void PolyTerminate(FirstArgument threadId, PolyWord arg)
{
TaskData *taskData = TaskData::FindTaskForId(threadId);
ASSERT(taskData != 0);
taskData->PreRTSCall();
int i = get_C_int(taskData, arg);
_exit(i); // Doesn't return.
}
// Get the name of a numeric error message.
POLYUNSIGNED PolyProcessEnvErrorName(FirstArgument threadId, PolyWord syserr)
{
TaskData *taskData = TaskData::FindTaskForId(threadId);
ASSERT(taskData != 0);
taskData->PreRTSCall();
Handle reset = taskData->saveVec.mark();
Handle result = 0;
try {
int e = (int)syserr.AsObjPtr()->Get(0).AsSigned();
// First look to see if we have the name in the error table. They should generally all be there.
const char *errorMsg = stringFromErrorCode(e);
if (errorMsg != NULL)
result = taskData->saveVec.push(C_string_to_Poly(taskData, errorMsg));
else
{ // If it isn't in the table.
char buff[40];
sprintf(buff, "ERROR%0d", e);
result = taskData->saveVec.push(C_string_to_Poly(taskData, buff));
}
}
catch (...) { } // If an ML exception is raised
taskData->saveVec.reset(reset); // Ensure the save vec is reset
taskData->PostRTSCall();
if (result == 0) return TAGGED(0).AsUnsigned();
else return result->Word().AsUnsigned();
}
/* Get the explanatory message for an error. */
POLYUNSIGNED PolyProcessEnvErrorMessage(FirstArgument threadId, PolyWord syserr)
{
TaskData *taskData = TaskData::FindTaskForId(threadId);
ASSERT(taskData != 0);
taskData->PreRTSCall();
Handle reset = taskData->saveVec.mark();
Handle result = 0;
try {
result = errorMsg(taskData, (int)syserr.AsObjPtr()->Get(0).AsSigned());
}
catch (...) { } // If an ML exception is raised
taskData->saveVec.reset(reset); // Ensure the save vec is reset
taskData->PostRTSCall();
if (result == 0) return TAGGED(0).AsUnsigned();
else return result->Word().AsUnsigned();
}
// Try to convert an error string to an error number.
POLYUNSIGNED PolyProcessEnvErrorFromString(FirstArgument threadId, PolyWord string)
{
TaskData *taskData = TaskData::FindTaskForId(threadId);
ASSERT(taskData != 0);
taskData->PreRTSCall();
Handle reset = taskData->saveVec.mark();
Handle result = 0;
try {
char buff[40];
// Get the string.
Poly_string_to_C(string, buff, sizeof(buff));
// Look the string up in the table.
int err = 0;
if (errorCodeFromString(buff, &err))
result = Make_sysword(taskData, err);
else if (strncmp(buff, "ERROR", 5) == 0)
// If we don't find it then it may have been a constructed error name.
result = Make_sysword(taskData, atoi(buff+5));
else result = Make_sysword(taskData, 0); // Return 0w0 if it isn't there.
}
catch (...) { } // If an ML exception is raised
taskData->saveVec.reset(reset); // Ensure the save vec is reset
taskData->PostRTSCall();
if (result == 0) return TAGGED(0).AsUnsigned();
else return result->Word().AsUnsigned();
}
// Return the maximum size of a cell that can be allocated on the heap.
POLYEXTERNALSYMBOL POLYUNSIGNED PolyGetMaxAllocationSize()
{
return TAGGED(MAX_OBJECT_SIZE).AsUnsigned();
}
// Return the maximum string size (in bytes).
// It is the maximum number of bytes in a segment less one word for the length field.
POLYEXTERNALSYMBOL POLYUNSIGNED PolyGetMaxStringSize()
{
return TAGGED((MAX_OBJECT_SIZE) * sizeof(PolyWord) - sizeof(PolyWord)).AsUnsigned();
}
POLYEXTERNALSYMBOL POLYUNSIGNED PolyGetPolyVersionNumber()
{
return TAGGED(POLY_version_number).AsUnsigned();
}
// Return the function name associated with a piece of compiled code.
POLYEXTERNALSYMBOL POLYUNSIGNED PolyGetFunctionName(FirstArgument threadId, PolyWord fnAddr)
{
TaskData *taskData = TaskData::FindTaskForId(threadId);
ASSERT(taskData != 0);
taskData->PreRTSCall();
Handle reset = taskData->saveVec.mark();
Handle result = 0;
try {
if (fnAddr.IsTagged()) raise_fail(taskData, "Not a code pointer");
PolyObject *pt = fnAddr.AsObjPtr();
// In 32-in-64 this may be a closure and the first word is the absolute address of the code.
if (pt->IsClosureObject())
{
// It may not be set yet.
pt = *(PolyObject**)pt;
if (((uintptr_t)pt & 1) == 1) raise_fail(taskData, "Not a code pointer");
}
if (pt->IsCodeObject()) /* Should now be a code object. */
{
/* Compiled code. This is the first constant in the constant area. */
- PolyWord *codePt = pt->ConstPtrForCode();
+ PolyWord *codePt = machineDependent->ConstPtrForCode(pt);
PolyWord name = codePt[0];
/* May be zero indicating an anonymous segment - return null string. */
if (name == PolyWord::FromUnsigned(0))
result = taskData->saveVec.push(C_string_to_Poly(taskData, ""));
else result = taskData->saveVec.push(name);
}
else raise_fail(taskData, "Not a code pointer");
}
catch (...) {} // If an ML exception is raised
taskData->saveVec.reset(reset); // Ensure the save vec is reset
taskData->PostRTSCall();
if (result == 0) return TAGGED(0).AsUnsigned();
else return result->Word().AsUnsigned();
}
// Get the command line process name.
POLYUNSIGNED PolyGetProcessName(FirstArgument threadId)
{
TaskData* taskData = TaskData::FindTaskForId(threadId);
ASSERT(taskData != 0);
taskData->PreRTSCall();
Handle reset = taskData->saveVec.mark();
Handle result = 0;
try {
result = taskData->saveVec.push(C_string_to_Poly(taskData, userOptions.programName));
}
catch (...) {} // If an ML exception is raised
taskData->saveVec.reset(reset); // Ensure the save vec is reset
taskData->PostRTSCall();
if (result == 0) return TAGGED(0).AsUnsigned();
else return result->Word().AsUnsigned();
}
// Get the command line arguments.
POLYUNSIGNED PolyGetCommandlineArguments(FirstArgument threadId)
{
TaskData* taskData = TaskData::FindTaskForId(threadId);
ASSERT(taskData != 0);
taskData->PreRTSCall();
Handle reset = taskData->saveVec.mark();
Handle result = 0;
try {
result = convert_string_list(taskData, userOptions.user_arg_count, userOptions.user_arg_strings);
}
catch (KillException&) {
processes->ThreadExit(taskData); // May test for kill
}
catch (...) {} // If an ML exception is raised
taskData->saveVec.reset(reset); // Ensure the save vec is reset
taskData->PostRTSCall();
if (result == 0) return TAGGED(0).AsUnsigned();
else return result->Word().AsUnsigned();
}
/* Return a string from the environment. */
POLYUNSIGNED PolyGetEnv(FirstArgument threadId, PolyWord arg)
{
TaskData* taskData = TaskData::FindTaskForId(threadId);
ASSERT(taskData != 0);
taskData->PreRTSCall();
Handle reset = taskData->saveVec.mark();
Handle pushedArg = taskData->saveVec.push(arg);
Handle result = 0;
try {
TempString buff(pushedArg->Word());
if (buff == 0)
raise_syscall(taskData, "Insufficient memory", NOMEMORY);
TCHAR * res = _tgetenv(buff);
if (res == NULL)
raise_syscall(taskData, "Not Found", 0);
result = taskData->saveVec.push(C_string_to_Poly(taskData, res));
}
catch (KillException&) {
processes->ThreadExit(taskData); // May test for kill
}
catch (...) {} // If an ML exception is raised
taskData->saveVec.reset(reset); // Ensure the save vec is reset
taskData->PostRTSCall();
if (result == 0) return TAGGED(0).AsUnsigned();
else return result->Word().AsUnsigned();
}
// Return the whole environment. Only available in Posix.ProcEnv.
POLYUNSIGNED PolyGetEnvironment(FirstArgument threadId)
{
TaskData* taskData = TaskData::FindTaskForId(threadId);
ASSERT(taskData != 0);
taskData->PreRTSCall();
Handle reset = taskData->saveVec.mark();
Handle result = 0;
try {
/* Count the environment strings */
int env_count = 0;
while (environ[env_count] != NULL) env_count++;
result = convert_string_list(taskData, env_count, environ);
}
catch (KillException&) {
processes->ThreadExit(taskData); // May test for kill
}
catch (...) {} // If an ML exception is raised
taskData->saveVec.reset(reset); // Ensure the save vec is reset
taskData->PostRTSCall();
if (result == 0) return TAGGED(0).AsUnsigned();
else return result->Word().AsUnsigned();
}
/* Return the success value. */
POLYUNSIGNED PolyProcessEnvSuccessValue(FirstArgument threadId)
{
TaskData* taskData = TaskData::FindTaskForId(threadId);
ASSERT(taskData != 0);
taskData->PreRTSCall();
Handle reset = taskData->saveVec.mark();
Handle result = 0;
try {
result = Make_fixed_precision(taskData, EXIT_SUCCESS);
}
catch (KillException&) {
processes->ThreadExit(taskData); // May test for kill
}
catch (...) {} // If an ML exception is raised
taskData->saveVec.reset(reset); // Ensure the save vec is reset
taskData->PostRTSCall();
if (result == 0) return TAGGED(0).AsUnsigned();
else return result->Word().AsUnsigned();
}
/* Return a failure value. */
POLYUNSIGNED PolyProcessEnvFailureValue(FirstArgument threadId)
{
TaskData* taskData = TaskData::FindTaskForId(threadId);
ASSERT(taskData != 0);
taskData->PreRTSCall();
Handle reset = taskData->saveVec.mark();
Handle result = 0;
try {
result = Make_fixed_precision(taskData, EXIT_FAILURE);
}
catch (KillException&) {
processes->ThreadExit(taskData); // May test for kill
}
catch (...) {} // If an ML exception is raised
taskData->saveVec.reset(reset); // Ensure the save vec is reset
taskData->PostRTSCall();
if (result == 0) return TAGGED(0).AsUnsigned();
else return result->Word().AsUnsigned();
}
/* Run command. */
POLYUNSIGNED PolyProcessEnvSystem(FirstArgument threadId, PolyWord arg)
{
TaskData* taskData = TaskData::FindTaskForId(threadId);
ASSERT(taskData != 0);
taskData->PreRTSCall();
Handle reset = taskData->saveVec.mark();
Handle pushedArg = taskData->saveVec.push(arg);
Handle result = 0;
try {
TempString buff(pushedArg->Word());
if (buff == 0) raise_syscall(taskData, "Insufficient memory", NOMEMORY);
int res = -1;
#if (defined(_WIN32) && ! defined(__CYGWIN__))
// Windows.
TCHAR * argv[4];
argv[0] = _tgetenv(_T("COMSPEC")); // Default CLI.
if (argv[0] == 0) argv[0] = (TCHAR*)_T("cmd.exe"); // Win NT etc.
argv[1] = (TCHAR*)_T("/c");
argv[2] = buff;
argv[3] = NULL;
// If _P_NOWAIT is given the result is the process handle.
// spawnvp does any necessary path searching if argv[0]
// does not contain a full path.
intptr_t pid = _tspawnvp(_P_NOWAIT, argv[0], argv);
if (pid == -1)
raise_syscall(taskData, "Function system failed", errno);
#else
// Cygwin and Unix
char* argv[4];
argv[0] = (char*)"sh";
argv[1] = (char*)"-c";
argv[2] = buff;
argv[3] = NULL;
#if (defined(__CYGWIN__))
CygwinSpawnRequest request(argv);
processes->MakeRootRequest(taskData, &request);
int pid = request.pid;
if (pid < 0)
raise_syscall(taskData, "Function system failed", errno);
#else
// We need to break this down so that we can unblock signals in the
// child process.
// The Unix "system" function seems to set SIGINT and SIGQUIT to
// SIG_IGN in the parent so that the wait will not be interrupted.
// That may make sense in a single-threaded application but is
// that right here?
int pid = vfork();
if (pid == -1)
raise_syscall(taskData, "Function system failed", errno);
else if (pid == 0)
{ // In child
sigset_t sigset;
sigemptyset(&sigset);
sigprocmask(SIG_SETMASK, &sigset, 0);
// Reset other signals?
execv("/bin/sh", argv);
_exit(1);
}
#endif
#endif
while (true)
{
try
{
// Test to see if the child has returned.
#if (defined(_WIN32) && ! defined(__CYGWIN__))
DWORD dwWait = WaitForSingleObject((HANDLE)pid, 0);
if (dwWait == WAIT_OBJECT_0)
{
DWORD dwResult;
BOOL fResult = GetExitCodeProcess((HANDLE)pid, &dwResult);
if (!fResult)
raise_syscall(taskData, "Function system failed", GetLastError());
CloseHandle((HANDLE)pid);
result = Make_fixed_precision(taskData, dwResult);
break;
}
else if (dwWait == WAIT_FAILED)
raise_syscall(taskData, "Function system failed", GetLastError());
else
{
// Wait for the process to exit or for the timeout
WaitHandle waiter((HANDLE)pid, 1000);
processes->ThreadPauseForIO(taskData, &waiter);
}
#else
int wRes = waitpid(pid, &res, WNOHANG);
if (wRes > 0)
break;
else if (wRes < 0)
{
raise_syscall(taskData, "Function system failed", errno);
}
// In Unix the best we can do is wait. This may be interrupted
// by SIGCHLD depending on where signals are processed.
// One possibility is for the main thread to somehow wake-up
// the thread when it processes a SIGCHLD.
else processes->ThreadPause(taskData);
#endif
}
catch (...)
{
// Either IOException or KillException.
// We're abandoning the wait. This will leave
// a zombie in Unix.
#if (defined(_WIN32) && ! defined(__CYGWIN__))
CloseHandle((HANDLE)pid);
#endif
throw;
}
}
result = Make_fixed_precision(taskData, res);
}
catch (KillException&) {
processes->ThreadExit(taskData); // May test for kill
}
catch (...) {} // If an ML exception is raised
taskData->saveVec.reset(reset); // Ensure the save vec is reset
taskData->PostRTSCall();
if (result == 0) return TAGGED(0).AsUnsigned();
else return result->Word().AsUnsigned();
}
struct _entrypts processEnvEPT[] =
{
{ "PolyFinish", (polyRTSFunction)&PolyFinish},
{ "PolyTerminate", (polyRTSFunction)&PolyTerminate},
{ "PolyProcessEnvGeneral", (polyRTSFunction)&PolyProcessEnvGeneral},
{ "PolyProcessEnvErrorName", (polyRTSFunction)&PolyProcessEnvErrorName},
{ "PolyProcessEnvErrorMessage", (polyRTSFunction)&PolyProcessEnvErrorMessage},
{ "PolyProcessEnvErrorFromString", (polyRTSFunction)&PolyProcessEnvErrorFromString},
{ "PolyGetMaxAllocationSize", (polyRTSFunction)&PolyGetMaxAllocationSize },
{ "PolyGetMaxStringSize", (polyRTSFunction)&PolyGetMaxStringSize },
{ "PolyGetPolyVersionNumber", (polyRTSFunction)&PolyGetPolyVersionNumber },
{ "PolyGetFunctionName", (polyRTSFunction)&PolyGetFunctionName },
{ "PolyGetProcessName", (polyRTSFunction)&PolyGetProcessName },
{ "PolyGetCommandlineArguments", (polyRTSFunction)&PolyGetCommandlineArguments },
{ "PolyGetEnv", (polyRTSFunction)& PolyGetEnv },
{ "PolyGetEnvironment", (polyRTSFunction)& PolyGetEnvironment },
{ "PolyProcessEnvSuccessValue", (polyRTSFunction)& PolyProcessEnvSuccessValue },
{ "PolyProcessEnvFailureValue", (polyRTSFunction)& PolyProcessEnvFailureValue },
{ "PolyProcessEnvSystem", (polyRTSFunction)& PolyProcessEnvSystem },
{ NULL, NULL} // End of list.
};
diff --git a/libpolyml/profiling.cpp b/libpolyml/profiling.cpp
index 2881ae55..68425b11 100644
--- a/libpolyml/profiling.cpp
+++ b/libpolyml/profiling.cpp
@@ -1,621 +1,622 @@
/*
Title: Profiling
Author: Dave Matthews, Cambridge University Computer Laboratory
Copyright (c) 2000-7
Cambridge University Technical Services Limited
- Further development copyright (c) David C.J. Matthews 2011, 2015, 2020
+ Further development copyright (c) David C.J. Matthews 2011, 2015, 2020-21
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#elif defined(_WIN32)
#include "winconfig.h"
#else
#error "No configuration file"
#endif
#ifdef HAVE_STDLIB_H
#include
#endif
#ifdef HAVE_MALLOC_H
#include
#endif
#ifdef HAVE_ASSERT_H
#include
#define ASSERT(x) assert(x)
#else
#define ASSERT(x) 0
#endif
#include "globals.h"
#include "arb.h"
#include "processes.h"
#include "polystring.h"
#include "profiling.h"
#include "save_vec.h"
#include "rts_module.h"
#include "memmgr.h"
#include "scanaddrs.h"
#include "locking.h"
#include "run_time.h"
#include "sys.h"
#include "rtsentry.h"
+#include "machine_dep.h"
extern "C" {
POLYEXTERNALSYMBOL POLYUNSIGNED PolyProfiling(FirstArgument threadId, PolyWord mode);
}
static long mainThreadCounts[MTP_MAXENTRY];
static const char* const mainThreadText[MTP_MAXENTRY] =
{
"UNKNOWN",
"GARBAGE COLLECTION (sharing phase)",
"GARBAGE COLLECTION (mark phase)",
"GARBAGE COLLECTION (copy phase)",
"GARBAGE COLLECTION (update phase)",
"GARBAGE COLLECTION (minor collection)",
"Common data sharing",
"Exporting",
"Saving state",
"Loading saved state",
"Profiling",
"Setting signal handler",
"Cygwin spawn",
"Storing module",
"Loading module"
};
// Entries for store profiling
enum _extraStore {
EST_CODE = 0,
EST_STRING,
EST_BYTE,
EST_WORD,
EST_MUTABLE,
EST_MUTABLEBYTE,
EST_MAX_ENTRY
};
static POLYUNSIGNED extraStoreCounts[EST_MAX_ENTRY];
static const char * const extraStoreText[EST_MAX_ENTRY] =
{
"Function code",
"Strings",
"Byte data (long precision ints etc)",
"Unidentified word data",
"Unidentified mutable data",
"Mutable byte data (profiling counts)"
};
// Poly strings for "standard" counts. These are generated from the C strings
// above the first time profiling is activated.
static PolyWord psRTSString[MTP_MAXENTRY], psExtraStrings[EST_MAX_ENTRY], psGCTotal;
ProfileMode profileMode;
// If we are just profiling a single thread, this is the thread data.
static TaskData *singleThreadProfile = 0;
// The queue is processed every 400ms and an entry can be
// added every ms of CPU time by each thread.
#define PCQUEUESIZE 4000
static long queuePtr = 0;
static POLYCODEPTR pcQueue[PCQUEUESIZE];
// Increment, returning the original value.
static int incrAtomically(long & p)
{
#if (defined(HAVE_SYNC_FETCH))
return __sync_fetch_and_add(&p, 1);
#elif (defined(_WIN32))
long newValue = InterlockedIncrement(&p);
return newValue - 1;
#else
return p++;
#endif
}
// Decrement and return new value.
static int decrAtomically(long & p)
{
#if (defined(HAVE_SYNC_FETCH))
return __sync_sub_and_fetch(&p, 1);
#elif (defined(_WIN32))
return InterlockedDecrement(&p);
#else
return --p;
#endif
}
typedef struct _PROFENTRY
{
POLYUNSIGNED count;
PolyWord functionName;
struct _PROFENTRY *nextEntry;
} PROFENTRY, *PPROFENTRY;
class ProfileRequest: public MainThreadRequest
{
public:
ProfileRequest(unsigned prof, TaskData *pTask):
MainThreadRequest(MTP_PROFILING), mode(prof), pCallingThread(pTask), pTab(0), errorMessage(0) {}
~ProfileRequest();
virtual void Perform();
Handle extractAsList(TaskData *taskData);
private:
void getResults(void);
void getProfileResults(PolyWord *bottom, PolyWord *top);
PPROFENTRY newProfileEntry(void);
private:
unsigned mode;
TaskData *pCallingThread;
PPROFENTRY pTab;
public:
const char *errorMessage;
};
ProfileRequest::~ProfileRequest()
{
PPROFENTRY p = pTab;
while (p != 0)
{
PPROFENTRY toFree = p;
p = p->nextEntry;
free(toFree);
}
}
// Lock to serialise updates of counts. Only used during update.
// Not required when we print the counts since there's only one thread
// running then.
static PLock countLock;
// Get the profile object associated with a piece of code. Returns null if
// there isn't one, in particular if this is in the old format.
static PolyObject *getProfileObjectForCode(PolyObject *code)
{
ASSERT(code->IsCodeObject());
PolyWord *consts;
POLYUNSIGNED constCount;
- code->GetConstSegmentForCode(consts, constCount);
+ machineDependent->GetConstSegmentForCode(code, consts, constCount);
if (constCount < 2 || consts[1].AsUnsigned() == 0 || ! consts[1].IsDataPtr()) return 0;
PolyObject *profObject = consts[1].AsObjPtr();
if (profObject->IsMutable() && profObject->IsByteObject() && profObject->Length() == 1)
return profObject;
else return 0;
}
// Adds incr to the profile count for the function pointed at by
// pc or by one of its callers.
void addSynchronousCount(POLYCODEPTR fpc, POLYUNSIGNED incr)
{
// Check that the pc value is within the heap. It could be
// in the assembly code.
PolyObject *codeObj = gMem.FindCodeObject(fpc);
if (codeObj)
{
PolyObject *profObject = getProfileObjectForCode(codeObj);
PLocker locker(&countLock);
if (profObject)
profObject->Set(0, PolyWord::FromUnsigned(profObject->Get(0).AsUnsigned() + incr));
return;
}
// Didn't find it.
{
PLocker locker(&countLock);
incrAtomically(mainThreadCounts[MTP_USER_CODE]);
}
}
// newProfileEntry - Make a new entry in the list
PPROFENTRY ProfileRequest::newProfileEntry(void)
{
PPROFENTRY newEntry = (PPROFENTRY)malloc(sizeof(PROFENTRY));
if (newEntry == 0) { errorMessage = "Insufficient memory"; return 0; }
newEntry->nextEntry = pTab;
pTab = newEntry;
return newEntry;
}
// We don't use ScanAddress here because we're only interested in the
// objects themselves not the addresses in them.
// We have to build the list of results in C memory rather than directly in
// ML memory because we can't allocate in ML memory in the root thread.
void ProfileRequest::getProfileResults(PolyWord *bottom, PolyWord *top)
{
PolyWord *ptr = bottom;
while (ptr < top)
{
ptr++; // Skip the length word
PolyObject *obj = (PolyObject*)ptr;
if (obj->ContainsForwardingPtr())
{
// This used to be necessary when code objects were held in the
// general heap. Now that we only ever scan code and permanent
// areas it's probably not needed.
while (obj->ContainsForwardingPtr())
obj = obj->GetForwardingPtr();
ASSERT(obj->ContainsNormalLengthWord());
ptr += obj->Length();
}
else
{
ASSERT(obj->ContainsNormalLengthWord());
if (obj->IsCodeObject())
{
- PolyWord *firstConstant = obj->ConstPtrForCode();
+ PolyWord *firstConstant = machineDependent->ConstPtrForCode(obj);
PolyWord name = firstConstant[0];
PolyObject *profCount = getProfileObjectForCode(obj);
if (profCount)
{
POLYUNSIGNED count = profCount->Get(0).AsUnsigned();
if (count != 0)
{
if (name != TAGGED(0))
{
PPROFENTRY pEnt = newProfileEntry();
if (pEnt == 0) return;
pEnt->count = count;
pEnt->functionName = name;
}
profCount->Set(0, PolyWord::FromUnsigned(0));
}
}
} /* code object */
ptr += obj->Length();
} /* else */
} /* while */
}
void ProfileRequest::getResults(void)
// Print profiling information and reset profile counts.
{
for (std::vector::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++)
{
MemSpace *space = *i;
// Permanent areas are filled with objects from the bottom.
getProfileResults(space->bottom, space->top); // Bottom to top
}
for (std::vector::iterator i = gMem.cSpaces.begin(); i < gMem.cSpaces.end(); i++)
{
CodeSpace *space = *i;
getProfileResults(space->bottom, space->top);
}
{
POLYUNSIGNED gc_count =
mainThreadCounts[MTP_GCPHASESHARING]+
mainThreadCounts[MTP_GCPHASEMARK]+
mainThreadCounts[MTP_GCPHASECOMPACT] +
mainThreadCounts[MTP_GCPHASEUPDATE] +
mainThreadCounts[MTP_GCQUICK];
if (gc_count)
{
PPROFENTRY pEnt = newProfileEntry();
if (pEnt == 0) return; // Report insufficient memory?
pEnt->count = gc_count;
pEnt->functionName = psGCTotal;
}
}
for (unsigned k = 0; k < MTP_MAXENTRY; k++)
{
if (mainThreadCounts[k])
{
PPROFENTRY pEnt = newProfileEntry();
if (pEnt == 0) return; // Report insufficient memory?
pEnt->count = mainThreadCounts[k];
pEnt->functionName = psRTSString[k];
mainThreadCounts[k] = 0;
}
}
for (unsigned l = 0; l < EST_MAX_ENTRY; l++)
{
if (extraStoreCounts[l])
{
PPROFENTRY pEnt = newProfileEntry();
if (pEnt == 0) return; // Report insufficient memory?
pEnt->count = extraStoreCounts[l];
pEnt->functionName = psExtraStrings[l];
extraStoreCounts[l] = 0;
}
}
}
// Extract the accumulated results as an ML list of pairs of the count and the string.
Handle ProfileRequest::extractAsList(TaskData *taskData)
{
Handle saved = taskData->saveVec.mark();
Handle list = taskData->saveVec.push(ListNull);
for (PPROFENTRY p = pTab; p != 0; p = p->nextEntry)
{
Handle pair = alloc_and_save(taskData, 2);
Handle countValue = Make_arbitrary_precision(taskData, p->count);
pair->WordP()->Set(0, countValue->Word());
pair->WordP()->Set(1, p->functionName);
Handle next = alloc_and_save(taskData, sizeof(ML_Cons_Cell) / sizeof(PolyWord));
DEREFLISTHANDLE(next)->h = pair->Word();
DEREFLISTHANDLE(next)->t =list->Word();
taskData->saveVec.reset(saved);
list = taskData->saveVec.push(next->Word());
}
return list;
}
// We have had an asynchronous interrupt and found a potential PC but
// we're in a signal handler.
void incrementCountAsynch(POLYCODEPTR pc)
{
int q = incrAtomically(queuePtr);
if (q < PCQUEUESIZE) pcQueue[q] = pc;
}
// Called by the main thread to process the queue of PC values
void processProfileQueue()
{
if (queuePtr == 0) return;
while (1)
{
int q = queuePtr;
if (q >= PCQUEUESIZE)
incrAtomically(mainThreadCounts[MTP_USER_CODE]);
else addSynchronousCount(pcQueue[q], 1);
if (decrAtomically(queuePtr) == 0) break;
}
}
// Handle a SIGVTALRM or the simulated equivalent in Windows. This may be called
// at any time so we have to be careful. In particular in Linux this may be
// executed by a thread while holding a mutex so we must not do anything, such
// calling malloc, that could require locking.
void handleProfileTrap(TaskData *taskData, SIGNALCONTEXT *context)
{
if (singleThreadProfile != 0 && singleThreadProfile != taskData)
return;
/* If we are in the garbage-collector add the count to "gc_count"
otherwise try to find out where we are. */
if (mainThreadPhase == MTP_USER_CODE)
{
if (taskData == 0 || ! taskData->AddTimeProfileCount(context))
incrAtomically(mainThreadCounts[MTP_USER_CODE]);
// On Mac OS X all virtual timer interrupts seem to be directed to the root thread
// so all the counts will be "unknown".
}
else incrAtomically(mainThreadCounts[mainThreadPhase]);
}
// Called from the GC when allocation profiling is on.
void AddObjectProfile(PolyObject *obj)
{
ASSERT(obj->ContainsNormalLengthWord());
POLYUNSIGNED length = obj->Length();
if (obj->IsWordObject() && OBJ_HAS_PROFILE(obj->LengthWord()))
{
// It has a profile pointer. The last word should point to the
// closure or code of the allocating function. Add the size of this to the count.
ASSERT(length != 0);
PolyWord profWord = obj->Get(length-1);
ASSERT(profWord.IsDataPtr());
PolyObject *profObject = profWord.AsObjPtr();
ASSERT(profObject->IsMutable() && profObject->IsByteObject() && profObject->Length() == 1);
profObject->Set(0, PolyWord::FromUnsigned(profObject->Get(0).AsUnsigned() + length + 1));
}
// If it doesn't have a profile pointer add it to the appropriate count.
else if (obj->IsMutable())
{
if (obj->IsByteObject())
extraStoreCounts[EST_MUTABLEBYTE] += length+1;
else extraStoreCounts[EST_MUTABLE] += length+1;
}
else if (obj->IsCodeObject())
extraStoreCounts[EST_CODE] += length+1;
else if (obj->IsClosureObject())
{
ASSERT(0);
}
else if (obj->IsByteObject())
{
// Try to separate strings from other byte data. This is only
// approximate.
if (OBJ_IS_NEGATIVE(obj->LengthWord()))
extraStoreCounts[EST_BYTE] += length+1;
else
{
PolyStringObject *possString = (PolyStringObject*)obj;
POLYUNSIGNED bytes = length * sizeof(PolyWord);
// If the length of the string as given in the first word is sufficient
// to fit in the exact number of words then it's probably a string.
if (length >= 2 &&
possString->length <= bytes - sizeof(POLYUNSIGNED) &&
possString->length > bytes - 2 * sizeof(POLYUNSIGNED))
extraStoreCounts[EST_STRING] += length+1;
else
{
extraStoreCounts[EST_BYTE] += length+1;
}
}
}
else extraStoreCounts[EST_WORD] += length+1;
}
// Called from ML to control profiling.
static Handle profilerc(TaskData *taskData, Handle mode_handle)
/* Profiler - generates statistical profiles of the code.
The parameter is an integer which determines the value to be profiled.
When profiler is called it always resets the profiling and prints out any
values which have been accumulated.
If the parameter is 0 this is all it does,
if the parameter is 1 then it produces time profiling,
if the parameter is 2 it produces store profiling.
3 - arbitrary precision emulation traps. */
{
unsigned mode = get_C_unsigned(taskData, mode_handle->Word());
{
// Create any strings we need. We only need to do this once but
// it must be done by a non-root thread since it needs a taskData object.
// Don't bother locking. At worst we'll create some garbage.
for (unsigned k = 0; k < MTP_MAXENTRY; k++)
{
if (psRTSString[k] == TAGGED(0))
psRTSString[k] = C_string_to_Poly(taskData, mainThreadText[k]);
}
for (unsigned k = 0; k < EST_MAX_ENTRY; k++)
{
if (psExtraStrings[k] == TAGGED(0))
psExtraStrings[k] = C_string_to_Poly(taskData, extraStoreText[k]);
}
if (psGCTotal == TAGGED(0))
psGCTotal = C_string_to_Poly(taskData, "GARBAGE COLLECTION (total)");
}
// All these actions are performed by the root thread. Only profile
// printing needs to be performed with all the threads stopped but it's
// simpler to serialise all requests.
ProfileRequest request(mode, taskData);
processes->MakeRootRequest(taskData, &request);
if (request.errorMessage != 0) raise_exception_string(taskData, EXC_Fail, request.errorMessage);
return request.extractAsList(taskData);
}
POLYUNSIGNED PolyProfiling(FirstArgument threadId, PolyWord mode)
{
TaskData *taskData = TaskData::FindTaskForId(threadId);
ASSERT(taskData != 0);
taskData->PreRTSCall();
Handle reset = taskData->saveVec.mark();
Handle pushedMode = taskData->saveVec.push(mode);
Handle result = 0;
try {
result = profilerc(taskData, pushedMode);
} catch (...) { } // If an ML exception is raised
taskData->saveVec.reset(reset);
taskData->PostRTSCall();
if (result == 0) return TAGGED(0).AsUnsigned();
else return result->Word().AsUnsigned();
}
// This is called from the root thread when all the ML threads have been paused.
void ProfileRequest::Perform()
{
if (mode != kProfileOff && profileMode != kProfileOff)
{
// Profiling must be stopped first.
errorMessage = "Profiling is currently active";
return;
}
singleThreadProfile = 0; // Unless kProfileTimeThread is given this should be 0
switch (mode)
{
case kProfileOff:
// Turn off old profiling mechanism and print out accumulated results
profileMode = kProfileOff;
processes->StopProfiling();
getResults();
// Remove all the bitmaps to free up memory
gMem.RemoveProfilingBitmaps();
break;
case kProfileTimeThread:
singleThreadProfile = pCallingThread;
// And drop through to kProfileTime
case kProfileTime:
profileMode = kProfileTime;
processes->StartProfiling();
break;
case kProfileStoreAllocation:
profileMode = kProfileStoreAllocation;
break;
case kProfileEmulation:
profileMode = kProfileEmulation;
break;
case kProfileLiveData:
profileMode = kProfileLiveData;
break;
case kProfileLiveMutables:
profileMode = kProfileLiveMutables;
break;
case kProfileMutexContention:
profileMode = kProfileMutexContention;
break;
default: /* do nothing */
break;
}
}
struct _entrypts profilingEPT[] =
{
// Profiling
{ "PolyProfiling", (polyRTSFunction)&PolyProfiling},
{ NULL, NULL} // End of list.
};
class Profiling: public RtsModule
{
public:
virtual void Init(void);
virtual void GarbageCollect(ScanAddress *process);
};
// Declare this. It will be automatically added to the table.
static Profiling profileModule;
void Profiling::Init(void)
{
// Reset profiling counts.
profileMode = kProfileOff;
for (unsigned k = 0; k < MTP_MAXENTRY; k++) mainThreadCounts[k] = 0;
}
void Profiling::GarbageCollect(ScanAddress *process)
{
// Process any strings in the table.
for (unsigned k = 0; k < MTP_MAXENTRY; k++)
process->ScanRuntimeWord(&psRTSString[k]);
for (unsigned k = 0; k < EST_MAX_ENTRY; k++)
process->ScanRuntimeWord(&psExtraStrings[k]);
process->ScanRuntimeWord(&psGCTotal);
}
diff --git a/libpolyml/savestate.cpp b/libpolyml/savestate.cpp
index 02511b89..a560ccdd 100644
--- a/libpolyml/savestate.cpp
+++ b/libpolyml/savestate.cpp
@@ -1,2265 +1,2262 @@
/*
Title: savestate.cpp - Save and Load state
Copyright (c) 2007, 2015, 2017-19, 2021 David C.J. Matthews
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#elif defined(_WIN32)
#include "winconfig.h"
#else
#error "No configuration file"
#endif
#ifdef HAVE_STDIO_H
#include
#endif
#ifdef HAVE_WINDOWS_H
#include // For MAX_PATH
#endif
#ifdef HAVE_SYS_PARAM_H
#include // For MAX_PATH
#endif
#ifdef HAVE_ERRNO_H
#include
#endif
#ifdef HAVE_TIME_H
#include
#endif
#ifdef HAVE_SYS_TYPES_H
#include
#endif
#ifdef HAVE_SYS_STAT_H
#include
#endif
#ifdef HAVE_UNISTD_H
#include
#endif
#ifdef HAVE_STRING_H
#include
#endif
#ifdef HAVE_ASSERT_H
#include
#define ASSERT(x) assert(x)
#else
#define ASSERT(x)
#endif
#if (defined(_WIN32))
#include
#define ERRORNUMBER _doserrno
#define NOMEMORY ERROR_NOT_ENOUGH_MEMORY
#else
typedef char TCHAR;
#define _T(x) x
#define _tfopen fopen
#define _tcscpy strcpy
#define _tcsdup strdup
#define _tcslen strlen
#define _fputtc fputc
#define _fputts fputs
#ifndef lstrcmpi
#define lstrcmpi strcasecmp
#endif
#define ERRORNUMBER errno
#define NOMEMORY ENOMEM
#endif
#include "globals.h"
#include "savestate.h"
#include "processes.h"
#include "run_time.h"
#include "polystring.h"
#include "scanaddrs.h"
#include "arb.h"
#include "memmgr.h"
#include "mpoly.h" // For exportTimeStamp
#include "exporter.h" // For CopyScan
#include "machine_dep.h"
#include "osmem.h"
#include "gc.h" // For FullGC.
#include "timing.h"
#include "rtsentry.h"
#include "check_objects.h"
#include "rtsentry.h"
#include "../polyexports.h" // For InitHeaderFromExport
#include "version.h" // For InitHeaderFromExport
#ifdef _MSC_VER
// Don't tell me about ISO C++ changes.
#pragma warning(disable:4996)
#endif
extern "C" {
POLYEXTERNALSYMBOL POLYUNSIGNED PolySaveState(FirstArgument threadId, PolyWord fileName, PolyWord depth);
POLYEXTERNALSYMBOL POLYUNSIGNED PolyLoadState(FirstArgument threadId, PolyWord arg);
POLYEXTERNALSYMBOL POLYUNSIGNED PolyShowHierarchy(FirstArgument threadId);
POLYEXTERNALSYMBOL POLYUNSIGNED PolyRenameParent(FirstArgument threadId, PolyWord childName, PolyWord parentName);
POLYEXTERNALSYMBOL POLYUNSIGNED PolyShowParent(FirstArgument threadId, PolyWord arg);
POLYEXTERNALSYMBOL POLYUNSIGNED PolyStoreModule(FirstArgument threadId, PolyWord name, PolyWord contents);
POLYEXTERNALSYMBOL POLYUNSIGNED PolyLoadModule(FirstArgument threadId, PolyWord arg);
POLYEXTERNALSYMBOL POLYUNSIGNED PolyLoadHierarchy(FirstArgument threadId, PolyWord arg);
POLYEXTERNALSYMBOL POLYUNSIGNED PolyGetModuleDirectory(FirstArgument threadId);
}
// Helper class to close files on exit.
class AutoClose {
public:
AutoClose(FILE *f = 0): m_file(f) {}
~AutoClose() { if (m_file) ::fclose(m_file); }
operator FILE*() { return m_file; }
FILE* operator = (FILE* p) { return (m_file = p); }
private:
FILE *m_file;
};
// This is probably generally useful so may be moved into
// a general header file.
template class AutoFree
{
public:
AutoFree(BASE p = 0): m_value(p) {}
~AutoFree() { free(m_value); }
// Automatic conversions to the base type.
operator BASE() { return m_value; }
BASE operator = (BASE p) { return (m_value = p); }
private:
BASE m_value;
};
#ifdef HAVE__FTELLI64
// fseek and ftell are only 32-bits in Windows.
#define off_t __int64
#define fseek _fseeki64
#define ftell _ftelli64
#endif
/*
* Structure definitions for the saved state files.
*/
#define SAVEDSTATESIGNATURE "POLYSAVE"
#define SAVEDSTATEVERSION 2
// File header for a saved state file. This appears as the first entry
// in the file.
typedef struct _savedStateHeader
{
// These entries are primarily to check that we have a valid
// saved state file before we try to interpret anything else.
char headerSignature[8]; // Should contain SAVEDSTATESIGNATURE
unsigned headerVersion; // Should contain SAVEDSTATEVERSION
unsigned headerLength; // Number of bytes in the header
unsigned segmentDescrLength; // Number of bytes in a descriptor
// These entries contain the real data.
off_t segmentDescr; // Position of segment descriptor table
unsigned segmentDescrCount; // Number of segment descriptors in the table
off_t stringTable; // Pointer to the string table (zero if none)
size_t stringTableSize; // Size of string table
unsigned parentNameEntry; // Position of parent name in string table (0 if top)
time_t timeStamp; // The time stamp for this file.
time_t parentTimeStamp; // The time stamp for the parent.
void *originalBaseAddr; // Original base address (32-in-64 only)
} SavedStateHeader;
// Entry for segment table. This describes the segments on the disc that
// need to be loaded into memory.
typedef struct _savedStateSegmentDescr
{
off_t segmentData; // Position of the segment data
size_t segmentSize; // Size of the segment data
off_t relocations; // Position of the relocation table
unsigned relocationCount; // Number of entries in relocation table
unsigned relocationSize; // Size of a relocation entry
unsigned segmentFlags; // Segment flags (see SSF_ values)
unsigned segmentIndex; // The index of this segment or the segment it overwrites
void *originalAddress; // The base address when the segment was written.
} SavedStateSegmentDescr;
#define SSF_WRITABLE 1 // The segment contains mutable data
#define SSF_OVERWRITE 2 // The segment overwrites the data (mutable) in a parent.
#define SSF_NOOVERWRITE 4 // The segment must not be further overwritten
#define SSF_BYTES 8 // The segment contains only byte data
#define SSF_CODE 16 // The segment contains only code
typedef struct _relocationEntry
{
// Each entry indicates a location that has to be set to an address.
// The location to be set is determined by adding "relocAddress" to the base address of
// this segment (the one to which these relocations apply) and the value to store
// by adding "targetAddress" to the base address of the segment indicated by "targetSegment".
POLYUNSIGNED relocAddress; // The (byte) offset in this segment that we will set
POLYUNSIGNED targetAddress; // The value to add to the base of the destination segment
unsigned targetSegment; // The base segment. 0 is IO segment.
ScanRelocationKind relKind; // The kind of relocation (processor dependent).
} RelocationEntry;
#define SAVE(x) taskData->saveVec.push(x)
/*
* Hierarchy table: contains information about last loaded or saved state.
*/
// Pointer to list of files loaded in last load.
// There's no need for a lock since the update is only made when all
// the ML threads have stopped.
class HierarchyTable
{
public:
HierarchyTable(const TCHAR *file, time_t time):
fileName(_tcsdup(file)), timeStamp(time) { }
AutoFree fileName;
time_t timeStamp;
};
HierarchyTable **hierarchyTable;
static unsigned hierarchyDepth;
static bool AddHierarchyEntry(const TCHAR *fileName, time_t timeStamp)
{
// Add an entry to the hierarchy table for this file.
HierarchyTable *newEntry = new HierarchyTable(fileName, timeStamp);
if (newEntry == 0) return false;
HierarchyTable **newTable =
(HierarchyTable **)realloc(hierarchyTable, sizeof(HierarchyTable *)*(hierarchyDepth+1));
if (newTable == 0) return false;
hierarchyTable = newTable;
hierarchyTable[hierarchyDepth++] = newEntry;
return true;
}
// Test whether we're overwriting a parent of ourself.
#if (defined(_WIN32) || defined(__CYGWIN__))
static bool sameFile(const TCHAR *x, const TCHAR *y)
{
HANDLE hXFile = INVALID_HANDLE_VALUE, hYFile = INVALID_HANDLE_VALUE;
bool result = false;
hXFile = CreateFile(x, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
if (hXFile == INVALID_HANDLE_VALUE) goto closeAndExit;
hYFile = CreateFile(y, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
if (hYFile == INVALID_HANDLE_VALUE) goto closeAndExit;
BY_HANDLE_FILE_INFORMATION fileInfoX, fileInfoY;
if (! GetFileInformationByHandle(hXFile, &fileInfoX)) goto closeAndExit;
if (! GetFileInformationByHandle(hYFile, &fileInfoY)) goto closeAndExit;
result = fileInfoX.dwVolumeSerialNumber == fileInfoY.dwVolumeSerialNumber &&
fileInfoX.nFileIndexLow == fileInfoY.nFileIndexLow &&
fileInfoX.nFileIndexHigh == fileInfoY.nFileIndexHigh;
closeAndExit:
if (hXFile != INVALID_HANDLE_VALUE) CloseHandle(hXFile);
if (hYFile != INVALID_HANDLE_VALUE) CloseHandle(hYFile);
return result;
}
#else
static bool sameFile(const char *x, const char *y)
{
struct stat xStat, yStat;
// If either file does not exist that's fine.
if (stat(x, &xStat) != 0 || stat(y, &yStat) != 0)
return false;
return (xStat.st_dev == yStat.st_dev && xStat.st_ino == yStat.st_ino);
}
#endif
/*
* Saving state.
*/
// This class is used to create the relocations. It uses Exporter
// for this but this may perhaps be too heavyweight.
class SaveStateExport: public Exporter, public ScanAddress
{
public:
SaveStateExport(unsigned int h=0): Exporter(h), relocationCount(0) {}
public:
virtual void exportStore(void) {} // Not used.
private:
// ScanAddress overrides
- virtual void ScanConstant(PolyObject *base, byte *addrOfConst, ScanRelocationKind code);
+ virtual void ScanConstant(PolyObject *base, byte *addrOfConst, ScanRelocationKind code, intptr_t displacement);
// At the moment we should only get calls to ScanConstant.
virtual PolyObject *ScanObjectAddress(PolyObject *base) { return base; }
protected:
void setRelocationAddress(void *p, POLYUNSIGNED *reloc);
PolyWord createRelocation(PolyWord p, void *relocAddr);
unsigned relocationCount;
friend class SaveRequest;
};
// Generate the address relative to the start of the segment.
void SaveStateExport::setRelocationAddress(void *p, POLYUNSIGNED *reloc)
{
unsigned area = findArea(p);
POLYUNSIGNED offset = (POLYUNSIGNED)((char*)p - (char*)memTable[area].mtOriginalAddr);
*reloc = offset;
}
// Create a relocation entry for an address at a given location.
PolyWord SaveStateExport::createRelocation(PolyWord p, void *relocAddr)
{
RelocationEntry reloc;
// Set the offset within the section we're scanning.
setRelocationAddress(relocAddr, &reloc.relocAddress);
void *addr = p.AsAddress();
unsigned addrArea = findArea(addr);
reloc.targetAddress = (POLYUNSIGNED)((char*)addr - (char*)memTable[addrArea].mtOriginalAddr);
reloc.targetSegment = (unsigned)memTable[addrArea].mtIndex;
reloc.relKind = PROCESS_RELOC_DIRECT;
fwrite(&reloc, sizeof(reloc), 1, exportFile);
relocationCount++;
return p; // Don't change the contents
}
/* This is called for each constant within the code.
Print a relocation entry for the word and return a value that means
that the offset is saved in original word. */
-void SaveStateExport::ScanConstant(PolyObject *base, byte *addr, ScanRelocationKind code)
+void SaveStateExport::ScanConstant(PolyObject *base, byte *addr, ScanRelocationKind code, intptr_t displacement)
{
- PolyObject *p = GetConstantValue(addr, code);
+ PolyObject *p = GetConstantValue(addr, code, displacement);
if (p == 0)
return;
void *a = p;
unsigned aArea = findArea(a);
// We don't need a relocation if this is relative to the current segment
// since the relative address will already be right.
if (code == PROCESS_RELOC_I386RELATIVE && aArea == findArea(addr))
return;
// Set the value at the address to the offset relative to the symbol.
RelocationEntry reloc;
setRelocationAddress(addr, &reloc.relocAddress);
reloc.targetAddress = (POLYUNSIGNED)((char*)a - (char*)memTable[aArea].mtOriginalAddr);
reloc.targetSegment = (unsigned)memTable[aArea].mtIndex;
reloc.relKind = code;
fwrite(&reloc, sizeof(reloc), 1, exportFile);
relocationCount++;
}
// Request to the main thread to save data.
class SaveRequest: public MainThreadRequest
{
public:
SaveRequest(const TCHAR *name, unsigned h): MainThreadRequest(MTP_SAVESTATE),
fileName(name), newHierarchy(h),
errorMessage(0), errCode(0) {}
virtual void Perform();
const TCHAR *fileName;
unsigned newHierarchy;
const char *errorMessage;
int errCode;
};
// This class is used to update references to objects that have moved. If
// we have copied an object into the area to be exported we may still have references
// to it from the stack or from RTS data structures. We have to ensure that these
// are updated.
// This is very similar to ProcessFixupAddress in sharedata.cpp
class SaveFixupAddress: public ScanAddress
{
protected:
virtual POLYUNSIGNED ScanAddressAt(PolyWord *pt);
virtual POLYUNSIGNED ScanCodeAddressAt(PolyObject **pt) { *pt = ScanObjectAddress(*pt); return 0; }
virtual PolyObject *ScanObjectAddress(PolyObject *base);
public:
void ScanCodeSpace(CodeSpace *space);
};
POLYUNSIGNED SaveFixupAddress::ScanAddressAt(PolyWord *pt)
{
PolyWord val = *pt;
if (val.IsDataPtr() && val != PolyWord::FromUnsigned(0))
*pt = ScanObjectAddress(val.AsObjPtr());
return 0;
}
// Returns the new address if the argument is the address of an object that
// has moved, otherwise returns the original.
PolyObject *SaveFixupAddress::ScanObjectAddress(PolyObject *obj)
{
if (obj->ContainsForwardingPtr()) // tombstone is a pointer to a moved object
{
#ifdef POLYML32IN64
MemSpace *space = gMem.SpaceForAddress((PolyWord*)obj - 1);
PolyObject *newp;
if (space->isCode)
newp = (PolyObject*)(globalCodeBase + ((obj->LengthWord() & ~_OBJ_TOMBSTONE_BIT) << 1));
else newp = obj->GetForwardingPtr();
#else
PolyObject *newp = obj->GetForwardingPtr();
#endif
ASSERT (newp->ContainsNormalLengthWord());
return newp;
}
ASSERT (obj->ContainsNormalLengthWord()); // object is not moved
return obj;
}
// Fix up addresses in the code area. Unlike ScanAddressesInRegion this updates
// cells that have been moved. We need to do that because we may still have
// return addresses into those cells and we don't move return addresses. We
// do want the code to see updated constant addresses.
void SaveFixupAddress::ScanCodeSpace(CodeSpace *space)
{
for (PolyWord *pt = space->bottom; pt < space->top; )
{
pt++;
PolyObject *obj = (PolyObject*)pt;
#ifdef POLYML32IN64
PolyObject *dest = obj;
while (dest->ContainsForwardingPtr())
{
MemSpace *space = gMem.SpaceForObjectAddress(dest);
if (space->isCode)
dest = (PolyObject*)(globalCodeBase + ((dest->LengthWord() & ~_OBJ_TOMBSTONE_BIT) << 1));
else dest = dest->GetForwardingPtr();
}
#else
PolyObject *dest = obj->FollowForwardingChain();
#endif
POLYUNSIGNED length = dest->Length();
if (length != 0)
ScanAddressesInObject(obj, dest->LengthWord());
pt += length;
}
}
// Called by the root thread to actually save the state and write the file.
void SaveRequest::Perform()
{
if (debugOptions & DEBUG_SAVING)
Log("SAVE: Beginning saving state.\n");
// Check that we aren't overwriting our own parent.
for (unsigned q = 0; q < newHierarchy-1; q++) {
if (sameFile(hierarchyTable[q]->fileName, fileName))
{
errorMessage = "File being saved is used as a parent of this file";
errCode = 0;
if (debugOptions & DEBUG_SAVING)
Log("SAVE: File being saved is used as a parent of this file.\n");
return;
}
}
SaveStateExport exports;
// Open the file. This could quite reasonably fail if the path is wrong.
exports.exportFile = _tfopen(fileName, _T("wb"));
if (exports.exportFile == NULL)
{
errorMessage = "Cannot open save file";
errCode = ERRORNUMBER;
if (debugOptions & DEBUG_SAVING)
Log("SAVE: Cannot open save file.\n");
return;
}
// Scan over the permanent mutable area copying all reachable data that is
// not in a lower hierarchy into new permanent segments.
CopyScan copyScan(newHierarchy);
copyScan.initialise(false);
bool success = true;
try {
for (std::vector::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++)
{
PermanentMemSpace *space = *i;
if (space->isMutable && !space->noOverwrite && !space->byteOnly)
{
if (debugOptions & DEBUG_SAVING)
Log("SAVE: Scanning permanent mutable area %p allocated at %p size %lu\n",
space, space->bottom, space->spaceSize());
copyScan.ScanAddressesInRegion(space->bottom, space->top);
}
}
}
catch (MemoryException &)
{
success = false;
if (debugOptions & DEBUG_SAVING)
Log("SAVE: Scan of permanent mutable area raised memory exception.\n");
}
// Copy the areas into the export object. Make sufficient space for
// the largest possible number of entries.
exports.memTable = new memoryTableEntry[gMem.eSpaces.size()+gMem.pSpaces.size()+1];
unsigned memTableCount = 0;
// Permanent spaces at higher level. These have to have entries although
// only the mutable entries will be written.
for (std::vector::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++)
{
PermanentMemSpace *space = *i;
if (space->hierarchy < newHierarchy)
{
memoryTableEntry *entry = &exports.memTable[memTableCount++];
entry->mtOriginalAddr = entry->mtCurrentAddr = space->bottom;
entry->mtLength = (space->topPointer-space->bottom)*sizeof(PolyWord);
entry->mtIndex = space->index;
entry->mtFlags = 0;
if (space->isMutable)
{
entry->mtFlags |= MTF_WRITEABLE;
if (space->noOverwrite) entry->mtFlags |= MTF_NO_OVERWRITE;
if (space->byteOnly) entry->mtFlags |= MTF_BYTES;
}
if (space->isCode)
entry->mtFlags |= MTF_EXECUTABLE;
}
}
unsigned permanentEntries = memTableCount; // Remember where new entries start.
// Newly created spaces.
for (std::vector::iterator i = gMem.eSpaces.begin(); i < gMem.eSpaces.end(); i++)
{
memoryTableEntry *entry = &exports.memTable[memTableCount++];
PermanentMemSpace *space = *i;
entry->mtOriginalAddr = entry->mtCurrentAddr = space->bottom;
entry->mtLength = (space->topPointer-space->bottom)*sizeof(PolyWord);
entry->mtIndex = space->index;
entry->mtFlags = 0;
if (space->isMutable)
{
entry->mtFlags |= MTF_WRITEABLE;
if (space->noOverwrite) entry->mtFlags |= MTF_NO_OVERWRITE;
if (space->byteOnly) entry->mtFlags |= MTF_BYTES;
}
if (space->isCode)
entry->mtFlags |= MTF_EXECUTABLE;
}
exports.memTableEntries = memTableCount;
if (debugOptions & DEBUG_SAVING)
Log("SAVE: Updating references to moved objects.\n");
// Update references to moved objects.
SaveFixupAddress fixup;
for (std::vector::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
{
LocalMemSpace *space = *i;
fixup.ScanAddressesInRegion(space->bottom, space->lowerAllocPtr);
fixup.ScanAddressesInRegion(space->upperAllocPtr, space->top);
}
for (std::vector::iterator i = gMem.cSpaces.begin(); i < gMem.cSpaces.end(); i++)
fixup.ScanCodeSpace(*i);
GCModules(&fixup);
// Restore the length words in the code areas.
// Although we've updated any pointers to the start of the code we could have return addresses
// pointing to the original code. GCModules updates the stack but doesn't update return addresses.
for (std::vector::iterator i = gMem.cSpaces.begin(); i < gMem.cSpaces.end(); i++)
{
CodeSpace *space = *i;
for (PolyWord *pt = space->bottom; pt < space->top; )
{
pt++;
PolyObject *obj = (PolyObject*)pt;
if (obj->ContainsForwardingPtr())
{
#ifdef POLYML32IN64
PolyObject *forwardedTo = obj;
while (forwardedTo->ContainsForwardingPtr())
forwardedTo = (PolyObject*)(globalCodeBase + ((forwardedTo->LengthWord() & ~_OBJ_TOMBSTONE_BIT) << 1));
#else
PolyObject *forwardedTo = obj->FollowForwardingChain();
#endif
POLYUNSIGNED lengthWord = forwardedTo->LengthWord();
space->writeAble(obj)->SetLengthWord(lengthWord);
}
pt += obj->Length();
}
}
// Update the global memory space table. Old segments at the same level
// or lower are removed. The new segments become permanent.
// Try to promote the spaces even if we've had a failure because export
// spaces are deleted in ~CopyScan and we may have already copied
// some objects there.
if (debugOptions & DEBUG_SAVING)
Log("SAVE: Promoting export spaces to permanent spaces.\n");
if (! gMem.PromoteExportSpaces(newHierarchy) || ! success)
{
errorMessage = "Out of Memory";
errCode = NOMEMORY;
if (debugOptions & DEBUG_SAVING)
Log("SAVE: Unable to promote export spaces.\n");
return;
}
// Remove any deeper entries from the hierarchy table.
while (hierarchyDepth > newHierarchy-1)
{
hierarchyDepth--;
delete(hierarchyTable[hierarchyDepth]);
hierarchyTable[hierarchyDepth] = 0;
}
if (debugOptions & DEBUG_SAVING)
Log("SAVE: Writing out data.\n");
// Write out the file header.
SavedStateHeader saveHeader;
memset(&saveHeader, 0, sizeof(saveHeader));
saveHeader.headerLength = sizeof(saveHeader);
memcpy(saveHeader.headerSignature,
SAVEDSTATESIGNATURE, sizeof(saveHeader.headerSignature));
saveHeader.headerVersion = SAVEDSTATEVERSION;
saveHeader.segmentDescrLength = sizeof(SavedStateSegmentDescr);
if (newHierarchy == 1)
saveHeader.parentTimeStamp = exportTimeStamp;
else
{
saveHeader.parentTimeStamp = hierarchyTable[newHierarchy-2]->timeStamp;
saveHeader.parentNameEntry = sizeof(TCHAR); // Always the first entry.
}
saveHeader.timeStamp = getBuildTime();
saveHeader.segmentDescrCount = exports.memTableEntries; // One segment for each space.
#ifdef POLYML32IN64
saveHeader.originalBaseAddr = globalHeapBase;
#endif
// Write out the header.
fwrite(&saveHeader, sizeof(saveHeader), 1, exports.exportFile);
// We need a segment header for each permanent area whether it is
// actually in this file or not.
SavedStateSegmentDescr *descrs = new SavedStateSegmentDescr [exports.memTableEntries];
for (unsigned j = 0; j < exports.memTableEntries; j++)
{
memoryTableEntry *entry = &exports.memTable[j];
memset(&descrs[j], 0, sizeof(SavedStateSegmentDescr));
descrs[j].relocationSize = sizeof(RelocationEntry);
descrs[j].segmentIndex = (unsigned)entry->mtIndex;
descrs[j].segmentSize = entry->mtLength; // Set this even if we don't write it.
descrs[j].originalAddress = entry->mtOriginalAddr;
if (entry->mtFlags & MTF_WRITEABLE)
{
descrs[j].segmentFlags |= SSF_WRITABLE;
if (entry->mtFlags & MTF_NO_OVERWRITE)
descrs[j].segmentFlags |= SSF_NOOVERWRITE;
if (j < permanentEntries && (entry->mtFlags & MTF_NO_OVERWRITE) == 0)
descrs[j].segmentFlags |= SSF_OVERWRITE;
if (entry->mtFlags & MTF_BYTES)
descrs[j].segmentFlags |= SSF_BYTES;
}
if (entry->mtFlags & MTF_EXECUTABLE)
descrs[j].segmentFlags |= SSF_CODE;
}
// Write out temporarily. Will be overwritten at the end.
saveHeader.segmentDescr = ftell(exports.exportFile);
fwrite(descrs, sizeof(SavedStateSegmentDescr), exports.memTableEntries, exports.exportFile);
// Write out the relocations and the data.
for (unsigned k = 1 /* Not IO area */; k < exports.memTableEntries; k++)
{
memoryTableEntry *entry = &exports.memTable[k];
// Write out the contents if this is new or if it is a normal, overwritable
// mutable area.
if (k >= permanentEntries ||
(entry->mtFlags & (MTF_WRITEABLE|MTF_NO_OVERWRITE)) == MTF_WRITEABLE)
{
descrs[k].relocations = ftell(exports.exportFile);
// Have to write this out.
exports.relocationCount = 0;
// Create the relocation table.
char *start = (char*)entry->mtOriginalAddr;
char *end = start + entry->mtLength;
for (PolyWord *p = (PolyWord*)start; p < (PolyWord*)end; )
{
p++;
PolyObject *obj = (PolyObject*)p;
POLYUNSIGNED length = obj->Length();
// Most relocations can be computed when the saved state is
// loaded so we only write out the difficult ones: those that
// occur within compiled code.
// exports.relocateObject(obj);
if (length != 0 && obj->IsCodeObject())
machineDependent->ScanConstantsWithinCode(obj, &exports);
p += length;
}
descrs[k].relocationCount = exports.relocationCount;
// Write out the data.
descrs[k].segmentData = ftell(exports.exportFile);
fwrite(entry->mtOriginalAddr, entry->mtLength, 1, exports.exportFile);
}
}
// If this is a child we need to write a string table containing the parent name.
if (newHierarchy > 1)
{
saveHeader.stringTable = ftell(exports.exportFile);
_fputtc(0, exports.exportFile); // First byte of string table is zero
_fputts(hierarchyTable[newHierarchy-2]->fileName, exports.exportFile);
_fputtc(0, exports.exportFile); // A terminating null.
saveHeader.stringTableSize = (_tcslen(hierarchyTable[newHierarchy-2]->fileName) + 2)*sizeof(TCHAR);
}
// Rewrite the header and the segment tables now they're complete.
fseek(exports.exportFile, 0, SEEK_SET);
fwrite(&saveHeader, sizeof(saveHeader), 1, exports.exportFile);
fwrite(descrs, sizeof(SavedStateSegmentDescr), exports.memTableEntries, exports.exportFile);
if (debugOptions & DEBUG_SAVING)
Log("SAVE: Writing complete.\n");
// Add an entry to the hierarchy table for this file.
(void)AddHierarchyEntry(fileName, saveHeader.timeStamp);
delete[](descrs);
CheckMemory();
}
// Write a saved state file.
POLYUNSIGNED PolySaveState(FirstArgument threadId, PolyWord fileName, PolyWord depth)
{
TaskData *taskData = TaskData::FindTaskForId(threadId);
ASSERT(taskData != 0);
taskData->PreRTSCall();
Handle reset = taskData->saveVec.mark();
try {
TempString fileNameBuff(Poly_string_to_T_alloc(fileName));
// The value of depth is zero for top-level save so we need to add one for hierarchy.
unsigned newHierarchy = get_C_unsigned(taskData, depth) + 1;
if (newHierarchy > hierarchyDepth + 1)
raise_fail(taskData, "Depth must be no more than the current hierarchy plus one");
// Request a full GC first. The main reason is to avoid running out of memory as a
// result of repeated saves. Old export spaces are turned into local spaces and
// the GC will delete them if they are completely empty
FullGC(taskData);
SaveRequest request(fileNameBuff, newHierarchy);
processes->MakeRootRequest(taskData, &request);
if (request.errorMessage)
raise_syscall(taskData, request.errorMessage, request.errCode);
}
catch (...) {} // If an ML exception is raised
taskData->saveVec.reset(reset);
taskData->PostRTSCall();
return TAGGED(0).AsUnsigned();
}
/*
* Loading saved state files.
*/
class StateLoader: public MainThreadRequest
{
public:
StateLoader(bool isH, Handle files): MainThreadRequest(MTP_LOADSTATE),
isHierarchy(isH), fileNameList(files), errorResult(0), errNumber(0) { }
virtual void Perform(void);
bool LoadFile(bool isInitial, time_t requiredStamp, PolyWord tail);
bool isHierarchy;
Handle fileNameList;
const char *errorResult;
// The fileName here is the last file loaded. As well as using it
// to load the name can also be printed out at the end to identify the
// particular file in the hierarchy that failed.
AutoFree fileName;
int errNumber;
};
// Called by the main thread once all the ML threads have stopped.
void StateLoader::Perform(void)
{
// Copy the first file name into the buffer.
if (isHierarchy)
{
if (ML_Cons_Cell::IsNull(fileNameList->Word()))
{
errorResult = "Hierarchy list is empty";
return;
}
ML_Cons_Cell *p = DEREFLISTHANDLE(fileNameList);
fileName = Poly_string_to_T_alloc(p->h);
if (fileName == NULL)
{
errorResult = "Insufficient memory";
errNumber = NOMEMORY;
return;
}
(void)LoadFile(true, 0, p->t);
}
else
{
fileName = Poly_string_to_T_alloc(fileNameList->Word());
if (fileName == NULL)
{
errorResult = "Insufficient memory";
errNumber = NOMEMORY;
return;
}
(void)LoadFile(true, 0, TAGGED(0));
}
}
class ClearVolatile: public ScanAddress
{
public:
ClearVolatile() {}
virtual PolyObject *ScanObjectAddress(PolyObject *base) { return base; }
virtual void ScanAddressesInObject(PolyObject *base, POLYUNSIGNED lengthWord);
};
// Set the values of external references and clear the values of other weak byte refs.
void ClearVolatile::ScanAddressesInObject(PolyObject *base, POLYUNSIGNED lengthWord)
{
if (OBJ_IS_MUTABLE_OBJECT(lengthWord) && OBJ_IS_NO_OVERWRITE(lengthWord))
{
if (OBJ_IS_BYTE_OBJECT(lengthWord))
{
if (OBJ_IS_WEAKREF_OBJECT(lengthWord))
{
POLYUNSIGNED len = OBJ_OBJECT_LENGTH(lengthWord);
if (len >= sizeof(uintptr_t) / sizeof(PolyWord))
*((uintptr_t*)base) = 0;
setEntryPoint(base);
}
}
else
{
// Clear volatile refs
POLYUNSIGNED len = OBJ_OBJECT_LENGTH(lengthWord);
for (POLYUNSIGNED i = 0; i < len; i++)
base->Set(i, TAGGED(0));
}
}
}
// This is copied from the B-tree in MemMgr. It probably should be
// merged but will do for the moment. It's intended to reduce the
// cost of finding the segment for relocation.
class SpaceBTree
{
public:
SpaceBTree(bool is, unsigned i = 0) : isLeaf(is), index(i) { }
virtual ~SpaceBTree() {}
bool isLeaf;
unsigned index; // The index if this is a leaf
};
// A non-leaf node in the B-tree
class SpaceBTreeTree : public SpaceBTree
{
public:
SpaceBTreeTree();
virtual ~SpaceBTreeTree();
SpaceBTree *tree[256];
};
SpaceBTreeTree::SpaceBTreeTree() : SpaceBTree(false)
{
for (unsigned i = 0; i < 256; i++)
tree[i] = 0;
}
SpaceBTreeTree::~SpaceBTreeTree()
{
for (unsigned i = 0; i < 256; i++)
delete(tree[i]);
}
// This class is used to relocate addresses in areas that have been loaded.
class LoadRelocate: public ScanAddress
{
public:
LoadRelocate(bool pcc = false): processCodeConstants(pcc), originalBaseAddr(0), descrs(0),
targetAddresses(0), nDescrs(0), spaceTree(0) {}
~LoadRelocate();
void RelocateObject(PolyObject *p);
virtual PolyObject *ScanObjectAddress(PolyObject *base) { ASSERT(0); return base; } // Not used
- virtual void ScanConstant(PolyObject *base, byte *addressOfConstant, ScanRelocationKind code);
+ virtual void ScanConstant(PolyObject *base, byte *addressOfConstant, ScanRelocationKind code, intptr_t displacement);
void RelocateAddressAt(PolyWord *pt);
PolyObject *RelocateAddress(PolyObject *obj);
void AddTreeRange(SpaceBTree **t, unsigned index, uintptr_t startS, uintptr_t endS);
bool processCodeConstants;
PolyWord *originalBaseAddr;
SavedStateSegmentDescr *descrs;
PolyWord **targetAddresses;
unsigned nDescrs;
SpaceBTree *spaceTree;
intptr_t relativeOffset;
};
LoadRelocate::~LoadRelocate()
{
if (descrs) delete[](descrs);
if (targetAddresses) delete[](targetAddresses);
delete(spaceTree);
}
// Add an entry to the space B-tree.
void LoadRelocate::AddTreeRange(SpaceBTree **tt, unsigned index, uintptr_t startS, uintptr_t endS)
{
if (*tt == 0)
*tt = new SpaceBTreeTree;
ASSERT(!(*tt)->isLeaf);
SpaceBTreeTree *t = (SpaceBTreeTree*)*tt;
const unsigned shift = (sizeof(void*) - 1) * 8; // Takes the high-order byte
uintptr_t r = startS >> shift;
ASSERT(r < 256);
const uintptr_t s = endS == 0 ? 256 : endS >> shift;
ASSERT(s >= r && s <= 256);
if (r == s) // Wholly within this entry
AddTreeRange(&(t->tree[r]), index, startS << 8, endS << 8);
else
{
// Deal with any remainder at the start.
if ((r << shift) != startS)
{
AddTreeRange(&(t->tree[r]), index, startS << 8, 0 /*End of range*/);
r++;
}
// Whole entries.
while (r < s)
{
ASSERT(t->tree[r] == 0);
t->tree[r] = new SpaceBTree(true, index);
r++;
}
// Remainder at the end.
if ((s << shift) != endS)
AddTreeRange(&(t->tree[r]), index, 0, endS << 8);
}
}
// Update the addresses in a group of words.
void LoadRelocate::RelocateAddressAt(PolyWord *pt)
{
PolyWord val = *pt;
if (! val.IsTagged())
*gMem.SpaceForAddress(pt)->writeAble(pt) = RelocateAddress(val.AsObjPtr(originalBaseAddr));
}
PolyObject *LoadRelocate::RelocateAddress(PolyObject *obj)
{
// Which segment is this address in?
// N.B. As with SpaceForAddress we need to subtract 1 to point to the length word.
uintptr_t t = (uintptr_t)((PolyWord*)obj - 1);
SpaceBTree *tr = spaceTree;
// Each level of the tree is either a leaf or a vector of trees.
unsigned j = sizeof(void *) * 8;
for (;;)
{
if (tr == 0) break;
if (tr->isLeaf) {
// It's in this segment: relocate it to the current position.
unsigned i = tr->index;
SavedStateSegmentDescr *descr = &descrs[i];
PolyWord *newAddress = targetAddresses[descr->segmentIndex];
ASSERT((char*)obj > descr->originalAddress &&
(char*)obj <= (char*)descr->originalAddress + descr->segmentSize);
ASSERT(newAddress != 0);
byte *setAddress = (byte*)newAddress + ((char*)obj - (char*)descr->originalAddress);
return (PolyObject*)setAddress;
}
j -= 8;
tr = ((SpaceBTreeTree*)tr)->tree[(t >> j) & 0xff];
}
// This should never happen.
ASSERT(0);
return 0;
}
// This is based on Exporter::relocateObject but does the reverse.
// It attempts to adjust all the addresses in the object when it has
// been read in.
void LoadRelocate::RelocateObject(PolyObject *p)
{
if (p->IsByteObject())
{
}
else if (p->IsCodeObject())
{
POLYUNSIGNED constCount;
PolyWord *cp;
ASSERT(! p->IsMutable() );
- p->GetConstSegmentForCode(cp, constCount);
+ machineDependent->GetConstSegmentForCode(p, cp, constCount);
/* Now the constant area. */
for (POLYUNSIGNED i = 0; i < constCount; i++) RelocateAddressAt(&(cp[i]));
// Saved states and modules have relocation entries for constants in the code.
// We can't use them when loading object files in 32-in-64 so have to process the
// constants here.
if (processCodeConstants)
- {
- POLYUNSIGNED length = p->Length();
- machineDependent->ScanConstantsWithinCode(p, p, length, this);
- }
+ machineDependent->ScanConstantsWithinCode(p, this);
}
else if (p->IsClosureObject())
{
// The first word is the address of the code.
POLYUNSIGNED length = p->Length();
*(PolyObject**)p = RelocateAddress(*(PolyObject**)p);
for (POLYUNSIGNED i = sizeof(PolyObject*)/sizeof(PolyWord); i < length; i++)
RelocateAddressAt(p->Offset(i));
}
else /* Ordinary objects, essentially tuples. */
{
POLYUNSIGNED length = p->Length();
for (POLYUNSIGNED i = 0; i < length; i++) RelocateAddressAt(p->Offset(i));
}
}
// Update addresses as constants within the code.
-void LoadRelocate::ScanConstant(PolyObject *base, byte *addressOfConstant, ScanRelocationKind code)
+void LoadRelocate::ScanConstant(PolyObject *base, byte *addressOfConstant, ScanRelocationKind code, intptr_t displacement)
{
- PolyObject *p = GetConstantValue(addressOfConstant, code, originalBaseAddr);
+ PolyObject *p = GetConstantValue(addressOfConstant, code, displacement);
if (p != 0)
{
// Relative addresses are computed by adding the CURRENT address.
// We have to convert them into addresses in original space before we
// can relocate them.
if (code == PROCESS_RELOC_I386RELATIVE)
p = (PolyObject*)((PolyWord*)p + relativeOffset);
PolyObject *newValue = RelocateAddress(p);
SetConstantValue(addressOfConstant, newValue, code);
}
}
// Work around bug in Mac OS when reading into MAP_JIT memory.
static size_t readData(void *ptr, size_t size, FILE *stream)
{
#ifndef MACOSX
return fread(ptr, size, 1, stream);
#else
char buff[1024];
for (size_t s = 0; s < size; )
{
size_t unit = sizeof(buff);
if (size - s < unit) unit = size-s;
if (fread(buff, unit, 1, stream) != 1)
return 0;
memcpy((char*)ptr+s, buff, unit);
s += unit;
}
return 1; // Succeeded
#endif
}
// Load a saved state file. Calls itself to handle parent files.
bool StateLoader::LoadFile(bool isInitial, time_t requiredStamp, PolyWord tail)
{
LoadRelocate relocate;
AutoFree thisFile(_tcsdup(fileName));
AutoClose loadFile(_tfopen(fileName, _T("rb")));
if ((FILE*)loadFile == NULL)
{
errorResult = "Cannot open load file";
errNumber = ERRORNUMBER;
return false;
}
SavedStateHeader header;
// Read the header and check the signature.
if (fread(&header, sizeof(SavedStateHeader), 1, loadFile) != 1)
{
errorResult = "Unable to load header";
return false;
}
if (strncmp(header.headerSignature, SAVEDSTATESIGNATURE, sizeof(header.headerSignature)) != 0)
{
errorResult = "File is not a saved state";
return false;
}
if (header.headerVersion != SAVEDSTATEVERSION ||
header.headerLength != sizeof(SavedStateHeader) ||
header.segmentDescrLength != sizeof(SavedStateSegmentDescr))
{
errorResult = "Unsupported version of saved state file";
return false;
}
// Check that we have the required stamp before loading any children.
// If a parent has been overwritten we could get a loop.
if (! isInitial && header.timeStamp != requiredStamp)
{
// Time-stamps don't match.
errorResult = "The parent for this saved state does not match or has been changed";
return false;
}
// Have verified that this is a reasonable saved state file. If it isn't a
// top-level file we have to load the parents first.
if (header.parentNameEntry != 0)
{
if (isHierarchy)
{
// Take the file name from the list
if (ML_Cons_Cell::IsNull(tail))
{
errorResult = "Missing parent name in argument list";
return false;
}
ML_Cons_Cell *p = (ML_Cons_Cell *)tail.AsObjPtr();
fileName = Poly_string_to_T_alloc(p->h);
if (fileName == NULL)
{
errorResult = "Insufficient memory";
errNumber = NOMEMORY;
return false;
}
if (! LoadFile(false, header.parentTimeStamp, p->t))
return false;
}
else
{
size_t toRead = header.stringTableSize-header.parentNameEntry;
size_t elems = ((toRead + sizeof(TCHAR) - 1) / sizeof(TCHAR));
// Always allow space for null terminator
size_t roundedBytes = (elems + 1) * sizeof(TCHAR);
TCHAR *newFileName = (TCHAR *)realloc(fileName, roundedBytes);
if (newFileName == NULL)
{
errorResult = "Insufficient memory";
errNumber = NOMEMORY;
return false;
}
fileName = newFileName;
if (header.parentNameEntry >= header.stringTableSize /* Bad entry */ ||
fseek(loadFile, header.stringTable + header.parentNameEntry, SEEK_SET) != 0 ||
fread(fileName, 1, toRead, loadFile) != toRead)
{
errorResult = "Unable to read parent file name";
return false;
}
fileName[elems] = 0; // Should already be null-terminated, but just in case.
if (! LoadFile(false, header.parentTimeStamp, TAGGED(0)))
return false;
}
ASSERT(hierarchyDepth > 0 && hierarchyTable[hierarchyDepth-1] != 0);
}
else // Top-level file
{
if (isHierarchy && ! ML_Cons_Cell::IsNull(tail))
{
// There should be no further file names if this is really the top.
errorResult = "Too many file names in the list";
return false;
}
if (header.parentTimeStamp != exportTimeStamp)
{
// Time-stamp does not match executable.
errorResult =
"Saved state was exported from a different executable or the executable has changed";
return false;
}
// Any existing spaces at this level or greater must be turned
// into local spaces. We may have references from the stack to objects that
// have previously been imported but otherwise these spaces are no longer
// needed.
gMem.DemoteImportSpaces();
// Clean out the hierarchy table.
for (unsigned h = 0; h < hierarchyDepth; h++)
{
delete(hierarchyTable[h]);
hierarchyTable[h] = 0;
}
hierarchyDepth = 0;
}
// Now have a valid, matching saved state.
// Load the segment descriptors.
relocate.nDescrs = header.segmentDescrCount;
relocate.descrs = new SavedStateSegmentDescr[relocate.nDescrs];
relocate.originalBaseAddr = (PolyWord*)header.originalBaseAddr;
if (fseek(loadFile, header.segmentDescr, SEEK_SET) != 0 ||
fread(relocate.descrs, sizeof(SavedStateSegmentDescr), relocate.nDescrs, loadFile) != relocate.nDescrs)
{
errorResult = "Unable to read segment descriptors";
return false;
}
{
unsigned maxIndex = 0;
for (unsigned i = 0; i < relocate.nDescrs; i++)
{
if (relocate.descrs[i].segmentIndex > maxIndex)
maxIndex = relocate.descrs[i].segmentIndex;
relocate.AddTreeRange(&relocate.spaceTree, i, (uintptr_t)relocate.descrs[i].originalAddress,
(uintptr_t)((char*)relocate.descrs[i].originalAddress + relocate.descrs[i].segmentSize-1));
}
relocate.targetAddresses = new PolyWord*[maxIndex+1];
for (unsigned i = 0; i <= maxIndex; i++) relocate.targetAddresses[i] = 0;
}
// Read in and create the new segments first. If we have problems,
// in particular if we have run out of memory, then it's easier to recover.
for (unsigned i = 0; i < relocate.nDescrs; i++)
{
SavedStateSegmentDescr *descr = &relocate.descrs[i];
MemSpace *space = gMem.SpaceForIndex(descr->segmentIndex);
if (space != NULL) relocate.targetAddresses[descr->segmentIndex] = space->bottom;
if (descr->segmentData == 0)
{ // No data - just an entry in the index.
if (space == NULL/* ||
descr->segmentSize != (size_t)((char*)space->top - (char*)space->bottom)*/)
{
errorResult = "Mismatch for existing memory space";
return false;
}
}
else if ((descr->segmentFlags & SSF_OVERWRITE) == 0)
{ // New segment.
if (space != NULL)
{
errorResult = "Segment already exists";
return false;
}
// Allocate memory for the new segment.
unsigned mFlags =
(descr->segmentFlags & SSF_WRITABLE ? MTF_WRITEABLE : 0) |
(descr->segmentFlags & SSF_NOOVERWRITE ? MTF_NO_OVERWRITE : 0) |
(descr->segmentFlags & SSF_BYTES ? MTF_BYTES : 0) |
(descr->segmentFlags & SSF_CODE ? MTF_EXECUTABLE : 0);
PermanentMemSpace *newSpace =
gMem.AllocateNewPermanentSpace(descr->segmentSize, mFlags, descr->segmentIndex, hierarchyDepth + 1);
if (newSpace == 0)
{
errorResult = "Unable to allocate memory";
return false;
}
PolyWord *mem = newSpace->bottom;
PolyWord* writeAble = newSpace->writeAble(mem);
if (fseek(loadFile, descr->segmentData, SEEK_SET) != 0)
{
errorResult = "Unable to seek segment";
return false;
}
if (readData(writeAble, descr->segmentSize, loadFile) != 1)
{
errorResult = "Unable to read segment";
return false;
}
// Fill unused space to the top of the area.
gMem.FillUnusedSpace(writeAble +descr->segmentSize/sizeof(PolyWord),
newSpace->spaceSize() - descr->segmentSize/sizeof(PolyWord));
// Leave it writable until we've done the relocations.
relocate.targetAddresses[descr->segmentIndex] = mem;
if (newSpace->noOverwrite)
{
ClearVolatile cwbr;
cwbr.ScanAddressesInRegion(newSpace->bottom, newSpace->topPointer);
}
}
}
// Now read in the mutable overwrites and relocate.
for (unsigned j = 0; j < relocate.nDescrs; j++)
{
SavedStateSegmentDescr *descr = &relocate.descrs[j];
MemSpace *space = gMem.SpaceForIndex(descr->segmentIndex);
ASSERT(space != NULL); // We should have created it.
if (descr->segmentFlags & SSF_OVERWRITE)
{
if (fseek(loadFile, descr->segmentData, SEEK_SET) != 0 ||
fread(space->bottom, descr->segmentSize, 1, loadFile) != 1)
{
errorResult = "Unable to read segment";
return false;
}
}
// Relocation.
if (descr->segmentData != 0)
{
// Adjust the addresses in the loaded segment.
for (PolyWord *p = space->bottom; p < space->top; )
{
p++;
PolyObject *obj = (PolyObject*)p;
POLYUNSIGNED length = obj->Length();
relocate.RelocateObject(obj);
p += length;
}
}
// Process explicit relocations.
// If we get errors just skip the error and continue rather than leave
// everything in an unstable state.
if (descr->relocations)
{
if (fseek(loadFile, descr->relocations, SEEK_SET) != 0)
{
errorResult = "Unable to read relocation segment";
return false;
}
for (unsigned k = 0; k < descr->relocationCount; k++)
{
RelocationEntry reloc;
if (fread(&reloc, sizeof(reloc), 1, loadFile) != 1)
{
errorResult = "Unable to read relocation segment";
return false;
}
MemSpace *toSpace = gMem.SpaceForIndex(reloc.targetSegment);
if (toSpace == NULL)
{
errorResult = "Unknown space reference in relocation";
continue;
}
byte *setAddress = (byte*)space->bottom + reloc.relocAddress;
byte *targetAddress = (byte*)toSpace->bottom + reloc.targetAddress;
if (setAddress >= (byte*)space->top || targetAddress >= (byte*)toSpace->top)
{
errorResult = "Bad relocation";
continue;
}
ScanAddress::SetConstantValue(setAddress, (PolyObject*)(targetAddress), reloc.relKind);
}
}
}
// Set the final permissions.
for (unsigned j = 0; j < relocate.nDescrs; j++)
{
SavedStateSegmentDescr *descr = &relocate.descrs[j];
if (descr->segmentData != 0)
{
PermanentMemSpace* space = gMem.SpaceForIndex(descr->segmentIndex);
gMem.CompletePermanentSpaceAllocation(space);
}
}
// Add an entry to the hierarchy table for this file.
if (! AddHierarchyEntry(thisFile, header.timeStamp))
return false;
return true; // Succeeded
}
static void LoadState(TaskData *taskData, bool isHierarchy, Handle hFileList)
// Load a saved state or a hierarchy.
// hFileList is a list if this is a hierarchy and a single name if it is not.
{
StateLoader loader(isHierarchy, hFileList);
// Request the main thread to do the load. This may set the error string if it failed.
processes->MakeRootRequest(taskData, &loader);
if (loader.errorResult != 0)
{
if (loader.errNumber == 0)
raise_fail(taskData, loader.errorResult);
else
{
AutoFree buff((char *)malloc(strlen(loader.errorResult) + 2 + _tcslen(loader.fileName) * sizeof(TCHAR) + 1));
#if (defined(_WIN32) && defined(UNICODE))
sprintf(buff, "%s: %S", loader.errorResult, (TCHAR *)loader.fileName);
#else
sprintf(buff, "%s: %s", loader.errorResult, (TCHAR *)loader.fileName);
#endif
raise_syscall(taskData, buff, loader.errNumber);
}
}
}
// Load a saved state file and any ancestors.
POLYUNSIGNED PolyLoadState(FirstArgument threadId, PolyWord arg)
{
TaskData *taskData = TaskData::FindTaskForId(threadId);
ASSERT(taskData != 0);
taskData->PreRTSCall();
Handle reset = taskData->saveVec.mark();
Handle pushedArg = taskData->saveVec.push(arg);
try {
LoadState(taskData, false, pushedArg);
}
catch (...) {} // If an ML exception is raised
taskData->saveVec.reset(reset);
taskData->PostRTSCall();
return TAGGED(0).AsUnsigned();
}
// Load hierarchy. This provides a complete list of children and parents.
POLYUNSIGNED PolyLoadHierarchy(FirstArgument threadId, PolyWord arg)
{
TaskData *taskData = TaskData::FindTaskForId(threadId);
ASSERT(taskData != 0);
taskData->PreRTSCall();
Handle reset = taskData->saveVec.mark();
Handle pushedArg = taskData->saveVec.push(arg);
try {
LoadState(taskData, true, pushedArg);
}
catch (...) {} // If an ML exception is raised
taskData->saveVec.reset(reset);
taskData->PostRTSCall();
return TAGGED(0).AsUnsigned();
}
/*
* Additional functions to provide information or change saved-state files.
*/
// These functions do not affect the global state so can be executed by
// the ML threads directly.
static Handle ShowHierarchy(TaskData *taskData)
// Return the list of files in the hierarchy.
{
Handle saved = taskData->saveVec.mark();
Handle list = SAVE(ListNull);
// Process this in reverse order.
for (unsigned i = hierarchyDepth; i > 0; i--)
{
Handle value = SAVE(C_string_to_Poly(taskData, hierarchyTable[i-1]->fileName));
Handle next = alloc_and_save(taskData, sizeof(ML_Cons_Cell)/sizeof(PolyWord));
DEREFLISTHANDLE(next)->h = value->Word();
DEREFLISTHANDLE(next)->t = list->Word();
taskData->saveVec.reset(saved);
list = SAVE(next->Word());
}
return list;
}
// Show the hierarchy.
POLYUNSIGNED PolyShowHierarchy(FirstArgument threadId)
{
TaskData *taskData = TaskData::FindTaskForId(threadId);
ASSERT(taskData != 0);
taskData->PreRTSCall();
Handle reset = taskData->saveVec.mark();
Handle result = 0;
try {
result = ShowHierarchy(taskData);
}
catch (...) {} // If an ML exception is raised
taskData->saveVec.reset(reset);
taskData->PostRTSCall();
if (result == 0) return TAGGED(0).AsUnsigned();
else return result->Word().AsUnsigned();
}
static void RenameParent(TaskData *taskData, PolyWord childName, PolyWord parentName)
// Change the name of the immediate parent stored in a child
{
// The name of the file to modify.
AutoFree fileNameBuff(Poly_string_to_T_alloc(childName));
if (fileNameBuff == NULL)
raise_syscall(taskData, "Insufficient memory", NOMEMORY);
// The new parent name to insert.
AutoFree parentNameBuff(Poly_string_to_T_alloc(parentName));
if (parentNameBuff == NULL)
raise_syscall(taskData, "Insufficient memory", NOMEMORY);
AutoClose loadFile(_tfopen(fileNameBuff, _T("r+b"))); // Open for reading and writing
if ((FILE*)loadFile == NULL)
{
AutoFree buff((char *)malloc(23 + _tcslen(fileNameBuff) * sizeof(TCHAR) + 1));
#if (defined(_WIN32) && defined(UNICODE))
sprintf(buff, "Cannot open load file: %S", (TCHAR *)fileNameBuff);
#else
sprintf(buff, "Cannot open load file: %s", (TCHAR *)fileNameBuff);
#endif
raise_syscall(taskData, buff, ERRORNUMBER);
}
SavedStateHeader header;
// Read the header and check the signature.
if (fread(&header, sizeof(SavedStateHeader), 1, loadFile) != 1)
raise_fail(taskData, "Unable to load header");
if (strncmp(header.headerSignature, SAVEDSTATESIGNATURE, sizeof(header.headerSignature)) != 0)
raise_fail(taskData, "File is not a saved state");
if (header.headerVersion != SAVEDSTATEVERSION ||
header.headerLength != sizeof(SavedStateHeader) ||
header.segmentDescrLength != sizeof(SavedStateSegmentDescr))
{
raise_fail(taskData, "Unsupported version of saved state file");
}
// Does this actually have a parent?
if (header.parentNameEntry == 0)
raise_fail(taskData, "File does not have a parent");
// At the moment the only entry in the string table is the parent
// name so we can simply write a new one on the end of the file.
// This makes the file grow slightly each time but it shouldn't be
// significant.
fseek(loadFile, 0, SEEK_END);
header.stringTable = ftell(loadFile); // Remember where this is
_fputtc(0, loadFile); // First byte of string table is zero
_fputts(parentNameBuff, loadFile);
_fputtc(0, loadFile); // A terminating null.
header.stringTableSize = (_tcslen(parentNameBuff) + 2)*sizeof(TCHAR);
// Now rewind and write the header with the revised string table.
fseek(loadFile, 0, SEEK_SET);
fwrite(&header, sizeof(header), 1, loadFile);
}
POLYUNSIGNED PolyRenameParent(FirstArgument threadId, PolyWord childName, PolyWord parentName)
{
TaskData *taskData = TaskData::FindTaskForId(threadId);
ASSERT(taskData != 0);
taskData->PreRTSCall();
Handle reset = taskData->saveVec.mark();
try {
RenameParent(taskData, childName, parentName);
}
catch (...) {} // If an ML exception is raised
taskData->saveVec.reset(reset);
taskData->PostRTSCall();
return TAGGED(0).AsUnsigned();
}
static Handle ShowParent(TaskData *taskData, Handle hFileName)
// Return the name of the immediate parent stored in a child
{
AutoFree fileNameBuff(Poly_string_to_T_alloc(hFileName->Word()));
if (fileNameBuff == NULL)
raise_syscall(taskData, "Insufficient memory", NOMEMORY);
AutoClose loadFile(_tfopen(fileNameBuff, _T("rb")));
if ((FILE*)loadFile == NULL)
{
AutoFree buff((char *)malloc(23 + _tcslen(fileNameBuff) * sizeof(TCHAR) + 1));
if (buff == NULL)
raise_syscall(taskData, "Insufficient memory", NOMEMORY);
#if (defined(_WIN32) && defined(UNICODE))
sprintf(buff, "Cannot open load file: %S", (TCHAR *)fileNameBuff);
#else
sprintf(buff, "Cannot open load file: %s", (TCHAR *)fileNameBuff);
#endif
raise_syscall(taskData, buff, ERRORNUMBER);
}
SavedStateHeader header;
// Read the header and check the signature.
if (fread(&header, sizeof(SavedStateHeader), 1, loadFile) != 1)
raise_fail(taskData, "Unable to load header");
if (strncmp(header.headerSignature, SAVEDSTATESIGNATURE, sizeof(header.headerSignature)) != 0)
raise_fail(taskData, "File is not a saved state");
if (header.headerVersion != SAVEDSTATEVERSION ||
header.headerLength != sizeof(SavedStateHeader) ||
header.segmentDescrLength != sizeof(SavedStateSegmentDescr))
{
raise_fail(taskData, "Unsupported version of saved state file");
}
// Does this have a parent?
if (header.parentNameEntry != 0)
{
size_t toRead = header.stringTableSize-header.parentNameEntry;
size_t elems = ((toRead + sizeof(TCHAR) - 1) / sizeof(TCHAR));
// Always allow space for null terminator
size_t roundedBytes = (elems + 1) * sizeof(TCHAR);
AutoFree parentFileName((TCHAR *)malloc(roundedBytes));
if (parentFileName == NULL)
raise_syscall(taskData, "Insufficient memory", NOMEMORY);
if (header.parentNameEntry >= header.stringTableSize /* Bad entry */ ||
fseek(loadFile, header.stringTable + header.parentNameEntry, SEEK_SET) != 0 ||
fread(parentFileName, 1, toRead, loadFile) != toRead)
{
raise_fail(taskData, "Unable to read parent file name");
}
parentFileName[elems] = 0; // Should already be null-terminated, but just in case.
// Convert the name into a Poly string and then build a "Some" value.
// It's possible, although silly, to have the empty string as a parent name.
Handle resVal = SAVE(C_string_to_Poly(taskData, parentFileName));
Handle result = alloc_and_save(taskData, 1);
DEREFHANDLE(result)->Set(0, resVal->Word());
return result;
}
else return SAVE(NONE_VALUE);
}
// Return the name of the immediate parent stored in a child
POLYUNSIGNED PolyShowParent(FirstArgument threadId, PolyWord arg)
{
TaskData *taskData = TaskData::FindTaskForId(threadId);
ASSERT(taskData != 0);
taskData->PreRTSCall();
Handle reset = taskData->saveVec.mark();
Handle pushedArg = taskData->saveVec.push(arg);
Handle result = 0;
try {
result = ShowParent(taskData, pushedArg);
}
catch (...) {} // If an ML exception is raised
taskData->saveVec.reset(reset);
taskData->PostRTSCall();
if (result == 0) return TAGGED(0).AsUnsigned();
else return result->Word().AsUnsigned();
}
// Module system
#define MODULESIGNATURE "POLYMODU"
#define MODULEVERSION 2
typedef struct _moduleHeader
{
// These entries are primarily to check that we have a valid
// saved state file before we try to interpret anything else.
char headerSignature[8]; // Should contain MODULESIGNATURE
unsigned headerVersion; // Should contain MODULEVERSION
unsigned headerLength; // Number of bytes in the header
unsigned segmentDescrLength; // Number of bytes in a descriptor
// These entries contain the real data.
off_t segmentDescr; // Position of segment descriptor table
unsigned segmentDescrCount; // Number of segment descriptors in the table
time_t timeStamp; // The time stamp for this file.
time_t executableTimeStamp; // The time stamp for the parent executable.
// Root
uintptr_t rootSegment;
POLYUNSIGNED rootOffset;
} ModuleHeader;
// Store a module
class ModuleStorer: public MainThreadRequest
{
public:
ModuleStorer(const TCHAR *file, Handle r):
MainThreadRequest(MTP_STOREMODULE), fileName(file), root(r), errorMessage(0), errCode(0) {}
virtual void Perform();
const TCHAR *fileName;
Handle root;
const char *errorMessage;
int errCode;
};
class ModuleExport: public SaveStateExport
{
public:
ModuleExport(): SaveStateExport(1/* Everything EXCEPT the executable. */) {}
virtual void exportStore(void); // Write the data out.
};
void ModuleStorer::Perform()
{
ModuleExport exporter;
#if (defined(_WIN32) && defined(UNICODE))
exporter.exportFile = _wfopen(fileName, L"wb");
#else
exporter.exportFile = fopen(fileName, "wb");
#endif
if (exporter.exportFile == NULL)
{
errorMessage = "Cannot open export file";
errCode = ERRORNUMBER;
return;
}
// RunExport copies everything reachable from the root, except data from
// the executable because we've set the hierarchy to 1, using CopyScan.
// It builds the tables in the export data structure then calls exportStore
// to actually write the data.
if (! root->Word().IsDataPtr())
{
// If we have a completely empty module the list may be null.
// This needs to be dealt with at a higher level.
errorMessage = "Module root is not an address";
return;
}
exporter.RunExport(root->WordP());
errorMessage = exporter.errorMessage; // This will be null unless there's been an error.
}
void ModuleExport::exportStore(void)
{
// What we need to do here is implement the export in a similar way to e.g. PECOFFExport::exportStore
// This is copied from SaveRequest::Perform and should be common code.
ModuleHeader modHeader;
memset(&modHeader, 0, sizeof(modHeader));
modHeader.headerLength = sizeof(modHeader);
memcpy(modHeader.headerSignature,
MODULESIGNATURE, sizeof(modHeader.headerSignature));
modHeader.headerVersion = MODULEVERSION;
modHeader.segmentDescrLength = sizeof(SavedStateSegmentDescr);
modHeader.executableTimeStamp = exportTimeStamp;
{
unsigned rootArea = findArea(this->rootFunction);
struct _memTableEntry *mt = &memTable[rootArea];
modHeader.rootSegment = mt->mtIndex;
modHeader.rootOffset = (POLYUNSIGNED)((char*)this->rootFunction - (char*)mt->mtOriginalAddr);
}
modHeader.timeStamp = getBuildTime();
modHeader.segmentDescrCount = this->memTableEntries; // One segment for each space.
// Write out the header.
fwrite(&modHeader, sizeof(modHeader), 1, this->exportFile);
SavedStateSegmentDescr *descrs = new SavedStateSegmentDescr [this->memTableEntries];
// We need an entry in the descriptor tables for each segment in the executable because
// we may have relocations that refer to addresses in it.
for (unsigned j = 0; j < this->memTableEntries; j++)
{
SavedStateSegmentDescr *thisDescr = &descrs[j];
memoryTableEntry *entry = &this->memTable[j];
memset(thisDescr, 0, sizeof(SavedStateSegmentDescr));
thisDescr->relocationSize = sizeof(RelocationEntry);
thisDescr->segmentIndex = (unsigned)entry->mtIndex;
thisDescr->segmentSize = entry->mtLength; // Set this even if we don't write it.
thisDescr->originalAddress = entry->mtOriginalAddr;
if (entry->mtFlags & MTF_WRITEABLE)
{
thisDescr->segmentFlags |= SSF_WRITABLE;
if (entry->mtFlags & MTF_NO_OVERWRITE)
thisDescr->segmentFlags |= SSF_NOOVERWRITE;
if ((entry->mtFlags & MTF_NO_OVERWRITE) == 0)
thisDescr->segmentFlags |= SSF_OVERWRITE;
if (entry->mtFlags & MTF_BYTES)
thisDescr->segmentFlags |= SSF_BYTES;
}
if (entry->mtFlags & MTF_EXECUTABLE)
thisDescr->segmentFlags |= SSF_CODE;
}
// Write out temporarily. Will be overwritten at the end.
modHeader.segmentDescr = ftell(this->exportFile);
fwrite(descrs, sizeof(SavedStateSegmentDescr), this->memTableEntries, this->exportFile);
// Write out the relocations and the data.
for (unsigned k = 0; k < this->memTableEntries; k++)
{
SavedStateSegmentDescr *thisDescr = &descrs[k];
memoryTableEntry *entry = &this->memTable[k];
if (k >= newAreas) // Not permanent areas
{
thisDescr->relocations = ftell(this->exportFile);
// Have to write this out.
this->relocationCount = 0;
// Create the relocation table.
char *start = (char*)entry->mtOriginalAddr;
char *end = start + entry->mtLength;
for (PolyWord *p = (PolyWord*)start; p < (PolyWord*)end; )
{
p++;
PolyObject *obj = (PolyObject*)p;
POLYUNSIGNED length = obj->Length();
// For saved states we don't include explicit relocations except
// in code but it's easier if we do for modules.
if (length != 0 && obj->IsCodeObject())
machineDependent->ScanConstantsWithinCode(obj, this);
relocateObject(obj);
p += length;
}
thisDescr->relocationCount = this->relocationCount;
// Write out the data.
thisDescr->segmentData = ftell(exportFile);
fwrite(entry->mtOriginalAddr, entry->mtLength, 1, exportFile);
}
}
// Rewrite the header and the segment tables now they're complete.
fseek(exportFile, 0, SEEK_SET);
fwrite(&modHeader, sizeof(modHeader), 1, exportFile);
fwrite(descrs, sizeof(SavedStateSegmentDescr), this->memTableEntries, exportFile);
delete[](descrs);
fclose(exportFile); exportFile = NULL;
}
// Store a module
POLYUNSIGNED PolyStoreModule(FirstArgument threadId, PolyWord name, PolyWord contents)
{
TaskData *taskData = TaskData::FindTaskForId(threadId);
ASSERT(taskData != 0);
taskData->PreRTSCall();
Handle reset = taskData->saveVec.mark();
Handle pushedContents = taskData->saveVec.push(contents);
try {
TempString fileName(name);
ModuleStorer storer(fileName, pushedContents);
processes->MakeRootRequest(taskData, &storer);
if (storer.errorMessage)
raise_syscall(taskData, storer.errorMessage, storer.errCode);
}
catch (...) {} // If an ML exception is raised
taskData->saveVec.reset(reset);
taskData->PostRTSCall();
return TAGGED(0).AsUnsigned();
}
// Load a module.
class ModuleLoader: public MainThreadRequest
{
public:
ModuleLoader(TaskData *taskData, const TCHAR *file):
MainThreadRequest(MTP_LOADMODULE), callerTaskData(taskData), fileName(file),
errorResult(NULL), errNumber(0), rootHandle(0) {}
virtual void Perform();
TaskData *callerTaskData;
const TCHAR *fileName;
const char *errorResult;
int errNumber;
Handle rootHandle;
};
void ModuleLoader::Perform()
{
AutoClose loadFile(_tfopen(fileName, _T("rb")));
if ((FILE*)loadFile == NULL)
{
errorResult = "Cannot open load file";
errNumber = ERRORNUMBER;
return;
}
ModuleHeader header;
// Read the header and check the signature.
if (fread(&header, sizeof(ModuleHeader), 1, loadFile) != 1)
{
errorResult = "Unable to load header";
return;
}
if (strncmp(header.headerSignature, MODULESIGNATURE, sizeof(header.headerSignature)) != 0)
{
errorResult = "File is not a Poly/ML module";
return;
}
if (header.headerVersion != MODULEVERSION ||
header.headerLength != sizeof(ModuleHeader) ||
header.segmentDescrLength != sizeof(SavedStateSegmentDescr))
{
errorResult = "Unsupported version of module file";
return;
}
if (header.executableTimeStamp != exportTimeStamp)
{
// Time-stamp does not match executable.
errorResult =
"Module was exported from a different executable or the executable has changed";
return;
}
LoadRelocate relocate;
relocate.nDescrs = header.segmentDescrCount;
relocate.descrs = new SavedStateSegmentDescr[relocate.nDescrs];
if (fseek(loadFile, header.segmentDescr, SEEK_SET) != 0 ||
fread(relocate.descrs, sizeof(SavedStateSegmentDescr), relocate.nDescrs, loadFile) != relocate.nDescrs)
{
errorResult = "Unable to read segment descriptors";
return;
}
{
unsigned maxIndex = 0;
for (unsigned i = 0; i < relocate.nDescrs; i++)
if (relocate.descrs[i].segmentIndex > maxIndex)
maxIndex = relocate.descrs[i].segmentIndex;
relocate.targetAddresses = new PolyWord*[maxIndex+1];
for (unsigned i = 0; i <= maxIndex; i++) relocate.targetAddresses[i] = 0;
}
// Read in and create the new segments first. If we have problems,
// in particular if we have run out of memory, then it's easier to recover.
for (unsigned i = 0; i < relocate.nDescrs; i++)
{
SavedStateSegmentDescr *descr = &relocate.descrs[i];
MemSpace *space = gMem.SpaceForIndex(descr->segmentIndex);
if (descr->segmentData == 0)
{ // No data - just an entry in the index.
if (space == NULL/* ||
descr->segmentSize != (size_t)((char*)space->top - (char*)space->bottom)*/)
{
errorResult = "Mismatch for existing memory space";
return;
}
else relocate.targetAddresses[descr->segmentIndex] = space->bottom;
}
else
{ // New segment.
if (space != NULL)
{
errorResult = "Segment already exists";
return;
}
// Allocate memory for the new segment.
size_t actualSize = descr->segmentSize;
MemSpace *space;
if (descr->segmentFlags & SSF_CODE)
{
CodeSpace *cSpace = gMem.NewCodeSpace(actualSize);
if (cSpace == 0)
{
errorResult = "Unable to allocate memory";
return;
}
space = cSpace;
cSpace->firstFree = (PolyWord*)((byte*)space->bottom + descr->segmentSize);
if (cSpace->firstFree != cSpace->top)
gMem.FillUnusedSpace(cSpace->firstFree, cSpace->top - cSpace->firstFree);
}
else
{
LocalMemSpace *lSpace = gMem.NewLocalSpace(actualSize, descr->segmentFlags & SSF_WRITABLE);
if (lSpace == 0)
{
errorResult = "Unable to allocate memory";
return;
}
space = lSpace;
lSpace->lowerAllocPtr = (PolyWord*)((byte*)lSpace->bottom + descr->segmentSize);
}
if (fseek(loadFile, descr->segmentData, SEEK_SET) != 0)
{
errorResult = "Unable to seek to segment";
return;
}
if (readData(space->bottom, descr->segmentSize, loadFile) != 1)
{
errorResult = "Unable to read segment";
return;
}
relocate.targetAddresses[descr->segmentIndex] = space->bottom;
if (space->isMutable && (descr->segmentFlags & SSF_BYTES) != 0)
{
ClearVolatile cwbr;
cwbr.ScanAddressesInRegion(space->bottom, (PolyWord*)((byte*)space->bottom + descr->segmentSize));
}
}
}
// Now deal with relocation.
for (unsigned j = 0; j < relocate.nDescrs; j++)
{
SavedStateSegmentDescr *descr = &relocate.descrs[j];
PolyWord *baseAddr = relocate.targetAddresses[descr->segmentIndex];
ASSERT(baseAddr != NULL); // We should have created it.
// Process explicit relocations.
// If we get errors just skip the error and continue rather than leave
// everything in an unstable state.
if (descr->relocations)
{
if (fseek(loadFile, descr->relocations, SEEK_SET) != 0)
errorResult = "Unable to read relocation segment";
for (unsigned k = 0; k < descr->relocationCount; k++)
{
RelocationEntry reloc;
if (fread(&reloc, sizeof(reloc), 1, loadFile) != 1)
errorResult = "Unable to read relocation segment";
byte *setAddress = (byte*)baseAddr + reloc.relocAddress;
byte *targetAddress = (byte*)relocate.targetAddresses[reloc.targetSegment] + reloc.targetAddress;
ScanAddress::SetConstantValue(setAddress, (PolyObject*)(targetAddress), reloc.relKind);
}
}
}
// Get the root address. Push this to the caller's save vec. If we put the
// newly created areas into local memory we could get a GC as soon as we
// complete this root request.
{
PolyWord *baseAddr = relocate.targetAddresses[header.rootSegment];
rootHandle = callerTaskData->saveVec.push((PolyObject*)((byte*)baseAddr + header.rootOffset));
}
}
static Handle LoadModule(TaskData *taskData, Handle args)
{
TempString fileName(args->Word());
ModuleLoader loader(taskData, fileName);
processes->MakeRootRequest(taskData, &loader);
if (loader.errorResult != 0)
{
if (loader.errNumber == 0)
raise_fail(taskData, loader.errorResult);
else
{
AutoFree buff((char *)malloc(strlen(loader.errorResult) + 2 + _tcslen(loader.fileName) * sizeof(TCHAR) + 1));
#if (defined(_WIN32) && defined(UNICODE))
sprintf(buff, "%s: %S", loader.errorResult, loader.fileName);
#else
sprintf(buff, "%s: %s", loader.errorResult, loader.fileName);
#endif
raise_syscall(taskData, buff, loader.errNumber);
}
}
return loader.rootHandle;
}
// Load a module
POLYUNSIGNED PolyLoadModule(FirstArgument threadId, PolyWord arg)
{
TaskData *taskData = TaskData::FindTaskForId(threadId);
ASSERT(taskData != 0);
taskData->PreRTSCall();
Handle reset = taskData->saveVec.mark();
Handle pushedArg = taskData->saveVec.push(arg);
Handle result = 0;
try {
result = LoadModule(taskData, pushedArg);
}
catch (...) {} // If an ML exception is raised
taskData->saveVec.reset(reset);
taskData->PostRTSCall();
if (result == 0) return TAGGED(0).AsUnsigned();
else return result->Word().AsUnsigned();
}
PolyObject *InitHeaderFromExport(struct _exportDescription *exports)
{
// Check the structure sizes stored in the export structure match the versions
// used in this library.
if (exports->structLength != sizeof(exportDescription) ||
exports->memTableSize != sizeof(memoryTableEntry) ||
exports->rtsVersion < FIRST_supported_version ||
exports->rtsVersion > LAST_supported_version)
{
#if (FIRST_supported_version == LAST_supported_version)
Exit("The exported object file has version %0.2f but this library supports %0.2f",
((float)exports->rtsVersion) / 100.0,
((float)FIRST_supported_version) / 100.0);
#else
Exit("The exported object file has version %0.2f but this library supports %0.2f-%0.2f",
((float)exports->rtsVersion) / 100.0,
((float)FIRST_supported_version) / 100.0,
((float)LAST_supported_version) / 100.0);
#endif
}
// We could also check the RTS version and the architecture.
exportTimeStamp = exports->timeStamp; // Needed for load and save.
memoryTableEntry *memTable = exports->memTable;
#ifdef POLYML32IN64
// We need to copy this into the heap before beginning execution.
// This is very like loading a saved state and the code should probably
// be merged.
LoadRelocate relocate(true);
relocate.nDescrs = exports->memTableEntries;
relocate.descrs = new SavedStateSegmentDescr[relocate.nDescrs];
relocate.targetAddresses = new PolyWord*[exports->memTableEntries];
relocate.originalBaseAddr = (PolyWord*)exports->originalBaseAddr;
PolyObject *root = 0;
for (unsigned i = 0; i < exports->memTableEntries; i++)
{
relocate.descrs[i].segmentIndex = memTable[i].mtIndex;
relocate.descrs[i].originalAddress = memTable[i].mtOriginalAddr;
relocate.descrs[i].segmentSize = memTable[i].mtLength;
PermanentMemSpace *newSpace =
gMem.AllocateNewPermanentSpace(memTable[i].mtLength, (unsigned)memTable[i].mtFlags, (unsigned)memTable[i].mtIndex);
if (newSpace == 0)
Exit("Unable to initialise a permanent memory space");
PolyWord *mem = newSpace->bottom;
memcpy(newSpace->writeAble(mem), memTable[i].mtCurrentAddr, memTable[i].mtLength);
PolyWord* unused = mem + memTable[i].mtLength / sizeof(PolyWord);
gMem.FillUnusedSpace(newSpace->writeAble(unused),
newSpace->spaceSize() - memTable[i].mtLength / sizeof(PolyWord));
if (newSpace == 0)
Exit("Unable to initialise a permanent memory space");
relocate.targetAddresses[i] = mem;
relocate.AddTreeRange(&relocate.spaceTree, i, (uintptr_t)relocate.descrs[i].originalAddress,
(uintptr_t)((char*)relocate.descrs[i].originalAddress + relocate.descrs[i].segmentSize - 1));
// Relocate the root function.
if (exports->rootFunction >= memTable[i].mtCurrentAddr && exports->rootFunction < (char*)memTable[i].mtCurrentAddr + memTable[i].mtLength)
{
root = (PolyObject*)((char*)mem + ((char*)exports->rootFunction - (char*)memTable[i].mtCurrentAddr));
}
}
// Now relocate the addresses
for (unsigned j = 0; j < exports->memTableEntries; j++)
{
SavedStateSegmentDescr *descr = &relocate.descrs[j];
MemSpace *space = gMem.SpaceForIndex(descr->segmentIndex);
// Any relative addresses have to be corrected by adding this.
relocate.relativeOffset = (PolyWord*)descr->originalAddress - space->bottom;
for (PolyWord *p = space->bottom; p < space->top; )
{
#ifdef POLYML32IN64
if ((((uintptr_t)p) & 4) == 0)
{
// Skip any padding. The length word should be on an odd-word boundary.
p++;
continue;
}
#endif
p++;
PolyObject *obj = (PolyObject*)p;
POLYUNSIGNED length = obj->Length();
relocate.RelocateObject(obj);
p += length;
}
}
// Set the final permissions.
for (unsigned j = 0; j < exports->memTableEntries; j++)
{
PermanentMemSpace *space = gMem.SpaceForIndex(memTable[j].mtIndex);
gMem.CompletePermanentSpaceAllocation(space);
}
return root;
#else
for (unsigned i = 0; i < exports->memTableEntries; i++)
{
// Construct a new space for each of the entries.
if (gMem.NewPermanentSpace(
(PolyWord*)memTable[i].mtCurrentAddr,
memTable[i].mtLength / sizeof(PolyWord), (unsigned)memTable[i].mtFlags,
(unsigned)memTable[i].mtIndex) == 0)
Exit("Unable to initialise a permanent memory space");
}
return (PolyObject *)exports->rootFunction;
#endif
}
// Return the system directory for modules. This is configured differently
// in Unix and in Windows.
POLYUNSIGNED PolyGetModuleDirectory(FirstArgument threadId)
{
TaskData *taskData = TaskData::FindTaskForId(threadId);
ASSERT(taskData != 0);
taskData->PreRTSCall();
Handle reset = taskData->saveVec.mark();
Handle result = 0;
try {
#if (defined(MODULEDIR))
result = SAVE(C_string_to_Poly(taskData, MODULEDIR));
#elif (defined(_WIN32))
{
// This registry key is configured when Poly/ML is installed using the installer.
// It gives the path to the Poly/ML installation directory. We return the
// Modules subdirectory.
HKEY hk;
if (RegOpenKeyEx(HKEY_LOCAL_MACHINE,
_T("SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\App Paths\\PolyML.exe"), 0,
KEY_QUERY_VALUE, &hk) == ERROR_SUCCESS)
{
DWORD valSize;
if (RegQueryValueEx(hk, _T("Path"), 0, NULL, NULL, &valSize) == ERROR_SUCCESS)
{
#define MODULEDIR _T("Modules")
TempString buff((TCHAR*)malloc(valSize + (_tcslen(MODULEDIR) + 1) * sizeof(TCHAR)));
DWORD dwType;
if (RegQueryValueEx(hk, _T("Path"), 0, &dwType, (LPBYTE)(LPTSTR)buff, &valSize) == ERROR_SUCCESS)
{
// The registry entry should end with a backslash.
_tcscat(buff, MODULEDIR);
result = SAVE(C_string_to_Poly(taskData, buff));
}
}
RegCloseKey(hk);
}
result = SAVE(C_string_to_Poly(taskData, ""));
}
#else
result = SAVE(C_string_to_Poly(taskData, ""));
#endif
}
catch (...) {} // If an ML exception is raised
taskData->saveVec.reset(reset);
taskData->PostRTSCall();
if (result == 0) return TAGGED(0).AsUnsigned();
else return result->Word().AsUnsigned();
}
struct _entrypts savestateEPT[] =
{
{ "PolySaveState", (polyRTSFunction)&PolySaveState },
{ "PolyLoadState", (polyRTSFunction)&PolyLoadState },
{ "PolyShowHierarchy", (polyRTSFunction)&PolyShowHierarchy },
{ "PolyRenameParent", (polyRTSFunction)&PolyRenameParent },
{ "PolyShowParent", (polyRTSFunction)&PolyShowParent },
{ "PolyStoreModule", (polyRTSFunction)&PolyStoreModule },
{ "PolyLoadModule", (polyRTSFunction)&PolyLoadModule },
{ "PolyLoadHierarchy", (polyRTSFunction)&PolyLoadHierarchy },
{ "PolyGetModuleDirectory", (polyRTSFunction)&PolyGetModuleDirectory },
{ NULL, NULL } // End of list.
};
diff --git a/libpolyml/scanaddrs.cpp b/libpolyml/scanaddrs.cpp
index 275f9b5b..847ac88d 100644
--- a/libpolyml/scanaddrs.cpp
+++ b/libpolyml/scanaddrs.cpp
@@ -1,290 +1,292 @@
/*
Title: Address scanner
- Copyright (c) 2006-8, 2012, 2019 David C.J. Matthews
+ Copyright (c) 2006-8, 2012, 2019, 2021 David C.J. Matthews
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#elif defined(_WIN32)
#include "winconfig.h"
#else
#error "No configuration file"
#endif
#ifdef HAVE_ASSERT_H
#include
#define ASSERT(x) assert(x)
#else
#define ASSERT(x) 0
#endif
#include
#include "globals.h"
#include "scanaddrs.h"
#include "machine_dep.h"
#include "diagnostics.h"
#include "memmgr.h"
// Process the value at a given location and update it as necessary.
POLYUNSIGNED ScanAddress::ScanAddressAt(PolyWord *pt)
{
PolyWord val = *pt;
PolyWord newVal = val;
if (IS_INT(val) || val == PolyWord::FromUnsigned(0))
{
// We can get zeros in the constant area if we garbage collect
// while compiling some code. */
}
else
{
ASSERT(OBJ_IS_DATAPTR(val));
// Any sort of address
newVal = ScanObjectAddress(val.AsObjPtr());
}
if (newVal != val) // Only update if we need to.
*pt = newVal;
return 0;
}
// General purpose object processor, Processes all the addresses in an object.
// Handles the various kinds of object that may contain addresses.
void ScanAddress::ScanAddressesInObject(PolyObject *obj, POLYUNSIGNED lengthWord)
{
do
{
ASSERT (OBJ_IS_LENGTH(lengthWord));
if (OBJ_IS_BYTE_OBJECT(lengthWord))
return; /* Nothing more to do */
POLYUNSIGNED length = OBJ_OBJECT_LENGTH(lengthWord);
PolyWord *baseAddr = (PolyWord*)obj;
if (OBJ_IS_CODE_OBJECT(lengthWord))
{
// Scan constants within the code.
- machineDependent->ScanConstantsWithinCode(obj, obj, length, this);
-
+ machineDependent->ScanConstantsWithinCode(obj, length, this);
// Skip to the constants and get ready to scan them.
- obj->GetConstSegmentForCode(length, baseAddr, length);
+ machineDependent->GetConstSegmentForCode(obj, length, baseAddr, length);
// Adjust to the read-write area if necessary.
baseAddr = gMem.SpaceForAddress(baseAddr)->writeAble(baseAddr);
}
else if (OBJ_IS_CLOSURE_OBJECT(lengthWord))
{
// The first word is a code pointer so we need to treat it specially
// but it is possible it hasn't yet been set.
if ((*(uintptr_t*)baseAddr & 1) == 0)
{
POLYUNSIGNED lengthWord = ScanCodeAddressAt((PolyObject**)baseAddr); // N.B. This could side-effect *baseAddr
if (lengthWord != 0)
ScanAddressesInObject(*(PolyObject**)baseAddr, lengthWord);
}
baseAddr += sizeof(PolyObject*) / sizeof(PolyWord);
length -= sizeof(PolyObject*) / sizeof(PolyWord);
}
PolyWord *endWord = baseAddr + length;
// We want to minimise the actual recursion we perform so we try to
// use tail recursion if we can. We first scan from the end and
// remove any words that don't need recursion.
POLYUNSIGNED lastLengthWord = 0;
while (endWord != baseAddr)
{
PolyWord wordAt = endWord[-1];
if (IS_INT(wordAt) || wordAt == PolyWord::FromUnsigned(0))
endWord--; // Don't need to look at this.
else if ((lastLengthWord = ScanAddressAt(endWord-1)) != 0)
// We need to process this one
break;
else endWord--; // We're not interested in this.
}
if (endWord == baseAddr)
return; // We've done everything.
// There is at least one word that needs to be processed, the
// one at endWord-1.
// Now process from the beginning forward to see if there are
// any words before this that need to be handled. This way we are more
// likely to handle the head of a list by recursion and the
// tail by looping (tail recursion).
while (baseAddr < endWord-1)
{
PolyWord wordAt = *baseAddr;
if (IS_INT(wordAt) || wordAt == PolyWord::FromUnsigned(0))
baseAddr++; // Don't need to look at this.
else
{
POLYUNSIGNED lengthWord = ScanAddressAt(baseAddr);
if (lengthWord != 0)
{
wordAt = *baseAddr; // Reload because it may have been side-effected
// We really have to process this recursively.
ASSERT(wordAt.IsDataPtr());
ScanAddressesInObject(wordAt.AsObjPtr(), lengthWord);
baseAddr++;
}
else baseAddr++;
}
}
// Finally process the last word we found that has to be processed.
// Do this by looping rather than recursion.
PolyWord wordAt = *baseAddr; // Last word to do.
// This must be an address
ASSERT(wordAt.IsDataPtr());
obj = wordAt.AsObjPtr();
lengthWord = lastLengthWord;
} while(1);
}
void ScanAddress::ScanAddressesInRegion(PolyWord *region, PolyWord *end)
{
PolyWord *pt = region;
while (pt < end)
{
#ifdef POLYML32IN64
if ((((uintptr_t)pt) & 4) == 0)
{
// Skip any padding. The length word should be on an odd-word boundary.
pt++;
continue;
}
#endif
pt++; // Skip length word.
// pt actually points AT the object here.
PolyObject *obj = (PolyObject*)pt;
if (obj->ContainsForwardingPtr()) /* skip over moved object */
{
// We can now get multiple forwarding pointers as a result
// of applying ShareData repeatedly. Perhaps we should
// turn the forwarding pointers back into normal words in
// an extra pass.
obj = obj->FollowForwardingChain();
ASSERT(obj->ContainsNormalLengthWord());
pt += obj->Length();
}
else
{
ASSERT(obj->ContainsNormalLengthWord());
POLYUNSIGNED length = obj->Length();
if (pt+length > end)
Crash("Malformed object at %p - length %lu\n", pt, length);
if (length != 0)
ScanAddressesInObject(obj);
pt += length;
}
}
}
// Extract a constant from the code.
-PolyObject *ScanAddress::GetConstantValue(byte *addressOfConstant, ScanRelocationKind code, PolyWord *base)
+PolyObject *ScanAddress::GetConstantValue(byte *addressOfConstant, ScanRelocationKind code, intptr_t displacement)
{
switch (code)
{
case PROCESS_RELOC_DIRECT: // Absolute address
{
uintptr_t valu;
byte *pt = addressOfConstant;
if (pt[sizeof(uintptr_t)-1] & 0x80) valu = 0-1; else valu = 0;
for (unsigned i = sizeof(uintptr_t); i > 0; i--)
valu = (valu << 8) | pt[i-1];
if (valu == 0 || PolyWord::FromUnsigned((POLYUNSIGNED)valu).IsTagged())
return 0;
else return (PolyObject*)valu;
}
case PROCESS_RELOC_I386RELATIVE: // 32 bit relative address
{
POLYSIGNED disp;
byte *pt = addressOfConstant;
// Get the displacement. This is signed.
if (pt[3] & 0x80) disp = -1; else disp = 0; // Set the sign just in case.
for(unsigned i = 4; i > 0; i--) disp = (disp << 8) | pt[i-1];
- byte *absAddr = pt + disp + 4; // The address is relative to AFTER the constant
+ byte *absAddr = pt + disp + 4 + displacement; // The address is relative to AFTER the constant
return (PolyObject*)absAddr;
}
default:
ASSERT(false);
return 0;
}
}
// Store a constant value. Also used with a patch table when importing a saved heap which has
// been exported using the C exporter.
void ScanAddress::SetConstantValue(byte *addressOfConstant, PolyObject *p, ScanRelocationKind code)
{
MemSpace* space = gMem.SpaceForAddress(addressOfConstant);
byte* addressToWrite = space->writeAble(addressOfConstant);
switch (code)
{
case PROCESS_RELOC_DIRECT: // Absolute address
{
uintptr_t valu = (uintptr_t)p;
for (unsigned i = 0; i < sizeof(uintptr_t); i++)
{
addressToWrite[i] = (byte)(valu & 255);
valu >>= 8;
}
}
break;
case PROCESS_RELOC_I386RELATIVE: // 32 bit relative address
{
// This offset may be positive or negative
intptr_t newDisp = (byte*)p - addressOfConstant - 4;
#if (SIZEOF_VOIDP != 4)
ASSERT(newDisp < (intptr_t)0x80000000 && newDisp >= -(intptr_t)0x80000000);
#endif
for (unsigned i = 0; i < 4; i++) {
addressToWrite[i] = (byte)(newDisp & 0xff);
newDisp >>= 8;
}
+ // When we have shifted it 32-bits the result there should
+ // be no significant bits left.
+ ASSERT(newDisp == 0 || newDisp == -1);
}
break;
}
}
-void ScanAddress::ScanConstant(PolyObject *base, byte *addressOfConstant, ScanRelocationKind code)
+void ScanAddress::ScanConstant(PolyObject *base, byte *addressOfConstant, ScanRelocationKind code, intptr_t displacement)
{
- PolyObject *p = GetConstantValue(addressOfConstant, code);
+ PolyObject *p = GetConstantValue(addressOfConstant, code, displacement);
if (p != 0)
{
PolyObject *oldValue = p;
// If this was a relative address we must have a code address.
if (code == PROCESS_RELOC_I386RELATIVE)
ScanCodeAddressAt(&p);
else p = ScanObjectAddress(p);
if (p != oldValue) // Update it if it has changed.
SetConstantValue(addressOfConstant, p, code);
}
}
void ScanAddress::ScanRuntimeWord(PolyWord *w)
{
if (w->IsTagged()) {} // Don't need to do anything
else {
ASSERT(w->IsDataPtr());
*w = ScanObjectAddress(w->AsObjPtr());
}
}
diff --git a/libpolyml/scanaddrs.h b/libpolyml/scanaddrs.h
index ff7c3482..15119418 100644
--- a/libpolyml/scanaddrs.h
+++ b/libpolyml/scanaddrs.h
@@ -1,102 +1,105 @@
/*
Title: scanaddrs.h - Scan addresses in objects
Copyright (c) 2006-8, 2012, 2015, 2018 David C.J. Matthews
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef SCANADDRS_H_INCLUDED
#define SCANADDRS_H_INCLUDED
#include "globals.h"
// Type of relocations.
typedef enum {
PROCESS_RELOC_DIRECT = 0, // 32 or 64 bit address of target
PROCESS_RELOC_I386RELATIVE // 32 or 64 bit relative address
} ScanRelocationKind;
class StackSpace;
class ScanAddress {
public:
virtual ~ScanAddress() {} // Keeps gcc happy
protected:
// Scan an address in the memory. "pt" always points into an object.
// It is not called with pt pointing at a C++ automatic variable.
// Tagged integers have already been filtered out.
// The result is the length word of the object to use if the object
// is to be processed recursively or 0 if it should not be.
// Default action - call ScanObjectAddress for the base object address of
// the address.
virtual POLYUNSIGNED ScanAddressAt(PolyWord *pt);
// As for ScanAddressAt except that the value is a pointer to the first word in a closure object.
// In most cases we're just scanning the heap we don't need to do anything and we scan
// the code area separately.
virtual POLYUNSIGNED ScanCodeAddressAt(PolyObject **pt) { return 0; }
public:
// The fundamental overridable for this class. Takes the object address and returns
// the updated address. If nothing else is overridden everything eventually comes here.
virtual PolyObject *ScanObjectAddress(PolyObject *base) = 0;// { return base; }
typedef enum { STRENGTH_STRONG = 0, STRENGTH_WEAK = 1 } RtsStrength;
// Scan an address in the run-time system. This normally just applies ScanObjectAddress
// but if this is a weak reference it can set *pt to NULL
virtual void ScanRuntimeAddress(PolyObject **pt, RtsStrength weak)
{ *pt = ScanObjectAddress(*pt); }
// Scan a word in the run-time system. This is the preferred call for non-weak
// references and deals with the general case of a word.
void ScanRuntimeWord(PolyWord *w);
// Process a constant within the code.
// The default action is to call the DEFAULT ScanAddressAt NOT the virtual which means that it calls
// ScanObjectAddress for the base address of the object referred to.
- virtual void ScanConstant(PolyObject *base, byte *addressOfConstant, ScanRelocationKind code);
+ // "displacement" is only used for relative addresses and is only non-zero when the code
+ // has been moved.
+ virtual void ScanConstant(PolyObject *base, byte *addressOfConstant, ScanRelocationKind code, intptr_t displacement=0);
// Scan the objects in the region and process their addresses. Applies ScanAddressesInObject
// to each of the objects. The "region" argument points AT the first length word.
// Typically used to scan or update addresses in the mutable area.
void ScanAddressesInRegion(PolyWord *region, PolyWord *endOfRegion);
// General object processor.
// If the object is a word object calls ScanAddressesAt for all the addresses.
//
// If the object is a code object calls ScanAddressesAt for the constant area and
// calls (indirectly) ScanConstant, and by default ScanObjectAddress for addresses within
// the code
//
// If the object is a stack calls ScanStackAddress which calls ScanObjectAddress for
// addresses within the code.
virtual void ScanAddressesInObject(PolyObject *base, POLYUNSIGNED lengthWord);
void ScanAddressesInObject(PolyObject *base) { ScanAddressesInObject(base, base->LengthWord()); }
+ // Create a relocation but don't adjust the address. This is used for offsets from the
+ // code area to the constant area.
+ virtual void RelocateOnly(PolyObject* base, byte* addressOfConstant, ScanRelocationKind code) {}
+
// Extract a constant from the code.
-#ifdef POLYML32IN64
- static PolyObject *GetConstantValue(byte *addressOfConstant, ScanRelocationKind code, PolyWord *base = globalHeapBase);
-#else
- static PolyObject *GetConstantValue(byte *addressOfConstant, ScanRelocationKind code, PolyWord *base = 0);
-#endif
+ static PolyObject *GetConstantValue(byte *addressOfConstant, ScanRelocationKind code, intptr_t displacement);
+
// Store a constant in the code.
static void SetConstantValue(byte *addressOfConstant, PolyObject *p, ScanRelocationKind code);
};
#endif
diff --git a/libpolyml/x86_dep.cpp b/libpolyml/x86_dep.cpp
index 3d6b0a22..83b50370 100644
--- a/libpolyml/x86_dep.cpp
+++ b/libpolyml/x86_dep.cpp
@@ -1,1468 +1,1538 @@
/*
Title: Machine dependent code for i386 and X64 under Windows and Unix
Copyright (c) 2000-7
Cambridge University Technical Services Limited
- Further work copyright David C. J. Matthews 2011-20
+ Further work copyright David C. J. Matthews 2011-21
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#elif defined(_WIN32)
#include "winconfig.h"
#else
#error "No configuration file"
#endif
#ifdef HAVE_STDLIB_H
#include
#endif
#include
#ifdef HAVE_SIGNAL_H
#include
#endif
#ifdef HAVE_ASSERT_H
#include
#define ASSERT(x) assert(x)
#else
#define ASSERT(x)
#endif
#ifdef HAVE_STRING_H
#include
#endif
#ifdef HAVE_ERRNO_H
#include
#endif
#if (defined(_WIN32))
#include
#include
#endif
#include "globals.h"
#include "run_time.h"
#include "diagnostics.h"
#include "processes.h"
#include "profiling.h"
#include "machine_dep.h"
#include "scanaddrs.h"
#include "memmgr.h"
#include "rtsentry.h"
#include "bytecode.h"
#include "sys.h" // Temporary
/**********************************************************************
*
* Register usage:
*
* %Reax: First argument to function. Result of function call.
* %Rebx: Second argument to function.
* %Recx: General register
* %Redx: Closure pointer in call.
* %Rebp: Points to memory used for extra registers
* %Resi: General register.
* %Redi: General register.
* %Resp: Stack pointer.
* The following apply only on the X64
* %R8: Third argument to function
* %R9: Fourth argument to function
* %R10: Fifth argument to function
* %R11: General register
* %R12: General register
* %R13: General register
* %R14: General register
* %R15: Memory allocation pointer
*
**********************************************************************/
#ifdef HOSTARCHITECTURE_X86_64
struct fpSaveArea {
double fpregister[7]; // Save area for xmm0-6
};
#else
// Structure of floating point save area.
// This is dictated by the hardware.
typedef byte fpregister[10];
struct fpSaveArea {
unsigned short cw;
unsigned short _unused0;
unsigned short sw;
unsigned short _unused1;
unsigned short tw;
unsigned short _unused2;
unsigned fip;
unsigned short fcs0;
unsigned short _unused3;
unsigned foo;
unsigned short fcs1;
unsigned short _unused4;
fpregister registers[8];
};
#endif
/* the amount of ML stack space to reserve for registers,
C exception handling etc. The compiler requires us to
reserve 2 stack-frames worth (2 * 20 words). We actually reserve
slightly more than this.
*/
#if (!defined(_WIN32) && !defined(HAVE_SIGALTSTACK))
// If we can't handle signals on a separate stack make sure there's space
// on the Poly stack.
#define OVERFLOW_STACK_SIZE (50+1024)
#else
#define OVERFLOW_STACK_SIZE 50
#endif
class X86TaskData;
// This is passed as the argument vector to X86AsmSwitchToPoly.
// The offsets are built into the assembly code and the code-generator.
// localMpointer and stackPtr are updated before control returns to C.
typedef struct _AssemblyArgs {
public:
PolyWord *localMpointer; // Allocation ptr + 1 word
stackItem *handlerRegister; // Current exception handler
PolyWord *localMbottom; // Base of memory + 1 word
stackItem *stackLimit; // Lower limit of stack
stackItem exceptionPacket; // Set if there is an exception
byte unusedRequestCode; // No longer used.
byte unusedFlag; // No longer used
byte returnReason; // Reason for returning from ML.
byte unusedRestore; // No longer used.
uintptr_t saveCStack; // Saved C stack frame.
PolyWord threadId; // My thread id. Saves having to call into RTS for it.
stackItem *stackPtr; // Current stack pointer
byte *enterInterpreter; // These are filled in with the functions.
byte *heapOverFlowCall;
byte *stackOverFlowCall;
byte *stackOverFlowCallEx;
byte *trapHandlerEntry;
// Saved registers, where applicable.
stackItem p_rax;
stackItem p_rbx;
stackItem p_rcx;
stackItem p_rdx;
stackItem p_rsi;
stackItem p_rdi;
#ifdef HOSTARCHITECTURE_X86_64
stackItem p_r8;
stackItem p_r9;
stackItem p_r10;
stackItem p_r11;
stackItem p_r12;
stackItem p_r13;
stackItem p_r14;
#endif
struct fpSaveArea p_fp;
} AssemblyArgs;
// These next few are temporarily added for the interpreter
// This duplicates some code in reals.cpp but is now updated.
#define DOUBLESIZE (sizeof(double)/sizeof(POLYUNSIGNED))
union realdb { double dble; POLYUNSIGNED puns[DOUBLESIZE]; };
#define LGWORDSIZE (sizeof(uintptr_t) / sizeof(PolyWord))
class X86TaskData: public TaskData, ByteCodeInterpreter {
public:
X86TaskData();
unsigned allocReg; // The register to take the allocated space.
POLYUNSIGNED allocWords; // The words to allocate.
AssemblyArgs assemblyInterface;
int saveRegisterMask; // Registers that need to be updated by a GC.
virtual void GarbageCollect(ScanAddress *process);
void ScanStackAddress(ScanAddress *process, stackItem &val, StackSpace *stack);
virtual void EnterPolyCode(); // Start running ML
virtual void InterruptCode();
virtual bool AddTimeProfileCount(SIGNALCONTEXT *context);
virtual void InitStackFrame(TaskData *parentTask, Handle proc);
virtual void SetException(poly_exn *exc);
// Atomically release a mutex using hardware interlock.
virtual bool AtomicallyReleaseMutex(PolyObject* mutexp);
// Return the minimum space occupied by the stack. Used when setting a limit.
// N.B. This is PolyWords not native words.
virtual uintptr_t currentStackSpace(void) const
{ return (this->stack->top - (PolyWord*)assemblyInterface.stackPtr) +
OVERFLOW_STACK_SIZE*sizeof(uintptr_t)/sizeof(PolyWord); }
// Increment the profile count for an allocation. Also now used for mutex contention.
virtual void addProfileCount(POLYUNSIGNED words)
{ addSynchronousCount(assemblyInterface.stackPtr[0].codeAddr, words); }
// PreRTSCall: After calling from ML to the RTS we need to save the current heap pointer
virtual void PreRTSCall(void) { TaskData::PreRTSCall(); SaveMemRegisters(); }
// PostRTSCall: Before returning we need to restore the heap pointer.
// If there has been a GC in the RTS call we need to create a new heap area.
virtual void PostRTSCall(void) { SetMemRegisters(); TaskData::PostRTSCall(); }
virtual void CopyStackFrame(StackObject *old_stack, uintptr_t old_length, StackObject *new_stack, uintptr_t new_length);
void HeapOverflowTrap(byte *pcPtr);
void StackOverflowTrap(uintptr_t space);
void SetMemRegisters();
void SaveMemRegisters();
void SetRegisterMask();
void HandleTrap();
// ByteCode overrides. The interpreter and native code states need to be in sync.
// The interpreter is only used during the initial bootstrap.
virtual void ClearExceptionPacket() { assemblyInterface.exceptionPacket = TAGGED(0); }
virtual PolyWord GetExceptionPacket() { return assemblyInterface.exceptionPacket; }
virtual stackItem* GetHandlerRegister() { return assemblyInterface.handlerRegister; }
virtual void SetHandlerRegister(stackItem* hr) { assemblyInterface.handlerRegister = hr; }
// Check and grow the stack if necessary. Process any interupts.
virtual void HandleStackOverflow(uintptr_t space) { StackOverflowTrap(space); }
void Interpret();
void EndBootStrap() { mixedCode = true; }
PLock interruptLock;
stackItem *get_reg(int n);
stackItem *®SP() { return assemblyInterface.stackPtr; }
stackItem ®AX() { return assemblyInterface.p_rax; }
stackItem ®BX() { return assemblyInterface.p_rbx; }
stackItem ®CX() { return assemblyInterface.p_rcx; }
stackItem ®DX() { return assemblyInterface.p_rdx; }
stackItem ®SI() { return assemblyInterface.p_rsi; }
stackItem ®DI() { return assemblyInterface.p_rdi; }
#ifdef HOSTARCHITECTURE_X86_64
stackItem ®8() { return assemblyInterface.p_r8; }
stackItem ®9() { return assemblyInterface.p_r9; }
stackItem ®10() { return assemblyInterface.p_r10; }
stackItem ®11() { return assemblyInterface.p_r11; }
stackItem ®12() { return assemblyInterface.p_r12; }
stackItem ®13() { return assemblyInterface.p_r13; }
stackItem ®14() { return assemblyInterface.p_r14; }
#endif
};
class X86Dependent: public MachineDependent {
public:
X86Dependent(): mustInterpret(false) {}
// Create a task data object.
virtual TaskData *CreateTaskData(void) { return new X86TaskData(); }
// Initial size of stack in PolyWords
virtual unsigned InitialStackSize(void) { return (128+OVERFLOW_STACK_SIZE) * sizeof(uintptr_t) / sizeof(PolyWord); }
- virtual void ScanConstantsWithinCode(PolyObject *addr, PolyObject *oldAddr, POLYUNSIGNED length, ScanAddress *process);
+ virtual void ScanConstantsWithinCode(PolyObject *addr, PolyObject *oldAddr, POLYUNSIGNED length,
+ PolyWord* newConstAddr, PolyWord* oldConstAddr, POLYUNSIGNED numConsts, ScanAddress *process);
virtual void SetBootArchitecture(char arch, unsigned wordLength);
virtual Architectures MachineArchitecture(void);
// During the first bootstrap phase this is interpreted.
bool mustInterpret;
+
+ // Override for X86-64
+ // Find the start of the constant section for a piece of code.
+ virtual void GetConstSegmentForCode(PolyObject* obj, POLYUNSIGNED obj_length, PolyWord*& cp, POLYUNSIGNED& count) const
+ {
+ PolyWord* last_word = obj->Offset(obj_length - 1); // Last word in the code
+#ifdef HOSTARCHITECTURE_X86_64
+ // Only the low order 32-bits are valid since this may be
+ // set by a 32-bit relative relocation.
+ int32_t offset = (int32_t)last_word->AsSigned();
+#else
+ POLYSIGNED offset = last_word->AsSigned();
+#endif
+ cp = last_word + 1 + offset / sizeof(PolyWord);
+ count = cp[-1].AsUnsigned();
+ }
};
static X86Dependent x86Dependent;
MachineDependent* machineDependent = &x86Dependent;
Architectures X86Dependent::MachineArchitecture(void)
{
if (mustInterpret) return MA_Interpreted;
#ifndef HOSTARCHITECTURE_X86_64
return MA_I386;
#elif defined(POLYML32IN64)
return MA_X86_64_32;
#else
return MA_X86_64;
#endif
}
void X86Dependent::SetBootArchitecture(char arch, unsigned wordLength)
{
if (arch == 'I')
mustInterpret = true;
else if (arch != 'X')
Crash("Boot file has unexpected architecture code: %c", arch);
}
// Values for the returnReason byte
enum RETURN_REASON {
RETURN_HEAP_OVERFLOW = 1,
RETURN_STACK_OVERFLOW = 2,
RETURN_STACK_OVERFLOWEX = 3,
RETURN_ENTER_INTERPRETER = 4
};
extern "C" {
// These are declared in the assembly code segment.
void X86AsmSwitchToPoly(void *);
int X86AsmCallExtraRETURN_ENTER_INTERPRETER(void);
int X86AsmCallExtraRETURN_HEAP_OVERFLOW(void);
int X86AsmCallExtraRETURN_STACK_OVERFLOW(void);
int X86AsmCallExtraRETURN_STACK_OVERFLOWEX(void);
void X86TrapHandler(PolyWord threadId);
};
X86TaskData::X86TaskData(): ByteCodeInterpreter(&assemblyInterface.stackPtr, &assemblyInterface.stackLimit),
allocReg(0), allocWords(0), saveRegisterMask(0)
{
assemblyInterface.enterInterpreter = (byte*)X86AsmCallExtraRETURN_ENTER_INTERPRETER;
assemblyInterface.heapOverFlowCall = (byte*)X86AsmCallExtraRETURN_HEAP_OVERFLOW;
assemblyInterface.stackOverFlowCall = (byte*)X86AsmCallExtraRETURN_STACK_OVERFLOW;
assemblyInterface.stackOverFlowCallEx = (byte*)X86AsmCallExtraRETURN_STACK_OVERFLOWEX;
assemblyInterface.trapHandlerEntry = (byte*)X86TrapHandler;
interpreterPc = 0;
mixedCode = !x86Dependent.mustInterpret;
}
void X86TaskData::GarbageCollect(ScanAddress *process)
{
TaskData::GarbageCollect(process); // Process the parent first
ByteCodeInterpreter::GarbageCollect(process);
assemblyInterface.threadId = threadObject;
if (stack != 0)
{
ASSERT(assemblyInterface.stackPtr >= (stackItem*)stack->bottom && assemblyInterface.stackPtr <= (stackItem*)stack->top);
// Now the values on the stack.
for (stackItem *q = assemblyInterface.stackPtr; q < (stackItem*)stack->top; q++)
ScanStackAddress(process, *q, stack);
}
// Register mask
for (int i = 0; i < 16; i++)
{
if (saveRegisterMask & (1 << i))
ScanStackAddress(process, *get_reg(i), stack);
}
}
// Process a value within the stack.
void X86TaskData::ScanStackAddress(ScanAddress *process, stackItem &stackItem, StackSpace *stack)
{
// We may have return addresses on the stack which could look like
// tagged values. Check whether the value is in the code area before
// checking whether it is untagged.
#ifdef POLYML32IN64
// In 32-in-64 return addresses always have the top 32 bits non-zero.
if (stackItem.argValue < ((uintptr_t)1 << 32))
{
// It's either a tagged integer or an object pointer.
if (stackItem.w().IsDataPtr())
{
PolyWord val = process->ScanObjectAddress(stackItem.w().AsObjPtr());
stackItem = val;
}
}
else
{
// Could be a code address or a stack address.
MemSpace *space = gMem.SpaceForAddress(stackItem.codeAddr - 1);
if (space == 0 || space->spaceType != ST_CODE) return;
PolyObject *obj = gMem.FindCodeObject(stackItem.codeAddr);
ASSERT(obj != 0);
// Process the address of the start. Don't update anything.
process->ScanObjectAddress(obj);
}
#else
// The -1 here is because we may have a zero-sized cell in the last
// word of a space.
MemSpace *space = gMem.SpaceForAddress(stackItem.codeAddr-1);
if (space == 0) return; // In particular we may have one of the assembly code addresses.
if (space->spaceType == ST_CODE)
{
PolyObject *obj = gMem.FindCodeObject(stackItem.codeAddr);
// If it is actually an integer it might be outside a valid code object.
if (obj == 0)
{
ASSERT(stackItem.w().IsTagged()); // It must be an integer
}
else // Process the address of the start. Don't update anything.
process->ScanObjectAddress(obj);
}
else if (space->spaceType == ST_LOCAL && stackItem.w().IsDataPtr())
// Local values must be word addresses.
{
PolyWord val = process->ScanObjectAddress(stackItem.w().AsObjPtr());
stackItem = val;
}
#endif
}
// Copy a stack
void X86TaskData::CopyStackFrame(StackObject *old_stack, uintptr_t old_length, StackObject *new_stack, uintptr_t new_length)
{
/* Moves a stack, updating all references within the stack */
#ifdef POLYML32IN64
old_length = old_length / 2;
new_length = new_length / 2;
#endif
stackItem *old_base = (stackItem *)old_stack;
stackItem *new_base = (stackItem*)new_stack;
stackItem *old_top = old_base + old_length;
/* Calculate the offset of the new stack from the old. If the frame is
being extended objects in the new frame will be further up the stack
than in the old one. */
uintptr_t offset = new_base - old_base + new_length - old_length;
stackItem *oldStackPtr = assemblyInterface.stackPtr;
// Adjust the stack pointer and handler pointer since these point into the stack.
assemblyInterface.stackPtr = assemblyInterface.stackPtr + offset;
assemblyInterface.handlerRegister = assemblyInterface.handlerRegister + offset;
// We need to adjust any values on the stack that are pointers within the stack.
// Skip the unused part of the stack.
size_t i = oldStackPtr - old_base;
ASSERT (i <= old_length);
i = old_length - i;
stackItem *old = oldStackPtr;
stackItem *newp = assemblyInterface.stackPtr;
while (i--)
{
stackItem old_word = *old++;
if (old_word.w().IsDataPtr() && old_word.stackAddr >= old_base && old_word.stackAddr <= old_top)
old_word.stackAddr = old_word.stackAddr + offset;
else if (old_word.w().IsDataPtr() && IsHeapAddress(old_word.stackAddr))
{
stackItem *addr = (stackItem*)old_word.w().AsStackAddr();
if (addr >= old_base && addr <= old_top)
{
addr += offset;
old_word = PolyWord::FromStackAddr((PolyWord*)addr);
}
}
*newp++ = old_word;
}
ASSERT(old == ((stackItem*)old_stack)+old_length);
ASSERT(newp == ((stackItem*)new_stack)+new_length);
// And change any registers that pointed into the old stack
for (int j = 0; j < 16; j++)
{
if (saveRegisterMask & (1 << j))
{
stackItem *regAddr = get_reg(j);
stackItem old_word = *regAddr;
if (old_word.w().IsDataPtr() && old_word.stackAddr >= old_base && old_word.stackAddr <= old_top)
old_word.stackAddr = old_word.stackAddr + offset;
else if (old_word.w().IsDataPtr() && IsHeapAddress(old_word.stackAddr))
{
stackItem *addr = (stackItem*)old_word.w().AsStackAddr();
if (addr >= old_base && addr <= old_top)
{
addr += offset;
old_word = PolyWord::FromStackAddr((PolyWord*)addr);
}
}
*regAddr = old_word;
}
}
}
void X86TaskData::EnterPolyCode()
/* Called from "main" to enter the code. */
{
if (x86Dependent.mustInterpret)
{
PolyWord closure = assemblyInterface.p_rdx;
*(--assemblyInterface.stackPtr) = closure; /* Closure address */
interpreterPc = *(POLYCODEPTR*)closure.AsObjPtr();
Interpret();
ASSERT(0); // Should never return
}
SetMemRegisters();
// Enter the ML code.
X86AsmSwitchToPoly(&this->assemblyInterface);
// This should never return
ASSERT(0);
}
void X86TaskData::Interpret()
{
while (true)
{
switch (RunInterpreter(this))
{
case ReturnCall:
// After the call there will be an enter-int instruction so that when this
// returns we will re-enter the interpreter. The number of arguments for
// this call is after that.
ASSERT(interpreterPc[0] == 0xff);
numTailArguments = interpreterPc[3];
case ReturnTailCall:
{
ClearExceptionPacket();
// Pop the closure.
PolyWord closureWord = *assemblyInterface.stackPtr++;
PolyObject* closure = closureWord.AsObjPtr();
interpreterPc = *(POLYCODEPTR*)closure;
if (interpreterPc[0] == 0xff && interpreterPc[1] == 0x55 && (interpreterPc[2] == 0x48 || interpreterPc[2] == 0x24))
{
// If the code we're going to is interpreted push back the closure and
// continue.
assemblyInterface.stackPtr--;
continue;
}
assemblyInterface.p_rdx = closureWord; // Put closure in the closure reg.
// Pop the return address.
POLYCODEPTR originalReturn = (assemblyInterface.stackPtr++)->codeAddr;
// Because of the way the build process works we only ever call functions with a single argument.
ASSERT(numTailArguments == 1);
assemblyInterface.p_rax = *(assemblyInterface.stackPtr++);
(*(--assemblyInterface.stackPtr)).codeAddr = originalReturn; // Push return address to caller
(*(--assemblyInterface.stackPtr)).codeAddr = *(POLYCODEPTR*)closure; // Entry point to callee
interpreterPc = 0; // No longer in the interpreter (See SaveMemRegs)
return;
}
case ReturnReturn:
{
ClearExceptionPacket();
if (interpreterPc[0] == 0xff && interpreterPc[1] == 0x55 && (interpreterPc[2] == 0x48 || interpreterPc[2] == 0x24))
continue;
// Get the return value from the stack and replace it by the
// address we're going to.
assemblyInterface.p_rax = assemblyInterface.stackPtr[0];
assemblyInterface.stackPtr[0].codeAddr = interpreterPc;
interpreterPc = 0; // No longer in the interpreter (See SaveMemRegs)
return;
}
}
}
}
// Called from the assembly code as a result of a trap i.e. a request for
// a GC or to extend the stack.
void X86TrapHandler(PolyWord threadId)
{
X86TaskData* taskData = (X86TaskData*)TaskData::FindTaskForId(threadId);
taskData->HandleTrap();
}
void X86TaskData::HandleTrap()
{
SaveMemRegisters(); // Update globals from the memory registers.
switch (this->assemblyInterface.returnReason)
{
case RETURN_HEAP_OVERFLOW:
// The heap has overflowed.
SetRegisterMask();
this->HeapOverflowTrap(assemblyInterface.stackPtr[0].codeAddr); // Computes a value for allocWords only
break;
case RETURN_STACK_OVERFLOW:
case RETURN_STACK_OVERFLOWEX:
{
SetRegisterMask();
uintptr_t min_size; // Size in PolyWords
if (assemblyInterface.returnReason == RETURN_STACK_OVERFLOW)
{
min_size = (this->stack->top - (PolyWord*)assemblyInterface.stackPtr) +
OVERFLOW_STACK_SIZE * sizeof(uintptr_t) / sizeof(PolyWord);
}
else
{
// Stack limit overflow. If the required stack space is larger than
// the fixed overflow size the code will calculate the limit in %EDI.
stackItem* stackP = regDI().stackAddr;
min_size = (this->stack->top - (PolyWord*)stackP) +
OVERFLOW_STACK_SIZE * sizeof(uintptr_t) / sizeof(PolyWord);
}
StackOverflowTrap(min_size);
break;
}
case RETURN_ENTER_INTERPRETER:
{
interpreterPc = assemblyInterface.stackPtr[0].codeAddr;
assemblyInterface.stackPtr++; // Pop return address.
byte reasonCode = *interpreterPc++;
// Sort out arguments.
assemblyInterface.exceptionPacket = TAGGED(0);
if (reasonCode == 0xff)
{
// Exception handler.
ASSERT(0); // Not used
assemblyInterface.exceptionPacket = assemblyInterface.p_rax; // Get the exception packet
// We're already in the exception handler but we still have to
// adjust the stack pointer and pop the current exception handler.
assemblyInterface.stackPtr = assemblyInterface.handlerRegister;
assemblyInterface.stackPtr++;
assemblyInterface.handlerRegister = (assemblyInterface.stackPtr++)[0].stackAddr;
}
else if (reasonCode >= 128)
{
// Start of function.
unsigned numArgs = reasonCode - 128;
// We need the stack to contain:
// The closure, the return address, the arguments.
// First pop the original return address.
POLYCODEPTR returnAddr = (assemblyInterface.stackPtr++)[0].codeAddr;
// Push the register args.
ASSERT(numArgs == 1); // We only ever call functions with one argument.
#ifdef HOSTARCHITECTURE_X86_64
ASSERT(numArgs <= 5);
if (numArgs >= 1) *(--assemblyInterface.stackPtr) = assemblyInterface.p_rax;
#ifdef POLYML32IN64
if (numArgs >= 2) *(--assemblyInterface.stackPtr) = assemblyInterface.p_rsi;
#else
if (numArgs >= 2) *(--assemblyInterface.stackPtr) = assemblyInterface.p_rbx;
#endif
if (numArgs >= 3) *(--assemblyInterface.stackPtr) = assemblyInterface.p_r8;
if (numArgs >= 4) *(--assemblyInterface.stackPtr) = assemblyInterface.p_r9;
if (numArgs >= 5) *(--assemblyInterface.stackPtr) = assemblyInterface.p_r10;
#else
ASSERT(numArgs <= 2);
if (numArgs >= 1) *(--assemblyInterface.stackPtr) = assemblyInterface.p_rax;
if (numArgs >= 2) *(--assemblyInterface.stackPtr) = assemblyInterface.p_rbx;
#endif
(--assemblyInterface.stackPtr)[0].codeAddr = returnAddr;
*(--assemblyInterface.stackPtr) = assemblyInterface.p_rdx; // Closure
}
else
{
// Return from call. Push RAX
*(--assemblyInterface.stackPtr) = assemblyInterface.p_rax;
}
Interpret();
break;
}
default:
Crash("Unknown return reason code %u", this->assemblyInterface.returnReason);
}
SetMemRegisters();
}
void X86TaskData::StackOverflowTrap(uintptr_t space)
{
uintptr_t min_size = (this->stack->top - (PolyWord*)assemblyInterface.stackPtr) + OVERFLOW_STACK_SIZE + space;
try {
// The stack check has failed. This may either be because we really have
// overflowed the stack or because the stack limit value has been adjusted
// to result in a call here.
CheckAndGrowStack(this, min_size);
}
catch (IOException&) {
// We may get an exception while handling this if we run out of store
}
{
PLocker l(&interruptLock);
// Set the stack limit. This clears any interrupt and also sets the
// correct value if we've grown the stack.
assemblyInterface.stackLimit = (stackItem*)this->stack->bottom + OVERFLOW_STACK_SIZE;
}
try {
processes->ProcessAsynchRequests(this);
// Release and re-acquire use of the ML memory to allow another thread
// to GC.
processes->ThreadReleaseMLMemory(this);
processes->ThreadUseMLMemory(this);
}
catch (IOException&) {
}
}
void X86TaskData::InitStackFrame(TaskData *parentTaskData, Handle proc)
/* Initialise stack frame. */
{
StackSpace *space = this->stack;
StackObject * newStack = space->stack();
uintptr_t stack_size = space->spaceSize() * sizeof(PolyWord) / sizeof(stackItem);
// Set the top of the stack inside the stack rather than at the end. This wastes
// a word but if sp is actually at the end OpenBSD segfaults because it isn't in
// a MAP_STACK area.
uintptr_t topStack = stack_size - 1;
stackItem* stackTop = (stackItem*)newStack + topStack;
*stackTop = TAGGED(0); // Set it to non-zero.
assemblyInterface.stackPtr = stackTop;
assemblyInterface.stackLimit = (stackItem*)space->bottom + OVERFLOW_STACK_SIZE;
assemblyInterface.handlerRegister = stackTop;
// Floating point save area.
memset(&assemblyInterface.p_fp, 0, sizeof(struct fpSaveArea));
#ifndef HOSTARCHITECTURE_X86_64
// Set the control word for 64-bit precision otherwise we get inconsistent results.
assemblyInterface.p_fp.cw = 0x027f ; // Control word
assemblyInterface.p_fp.tw = 0xffff; // Tag registers - all unused
#endif
// Store the argument and the closure.
assemblyInterface.p_rdx = proc->Word(); // Closure
assemblyInterface.p_rax = TAGGED(0); // Argument
// Have to set the register mask in case we get a GC before the thread starts.
saveRegisterMask = (1 << 2) | 1; // Rdx and rax
#ifdef POLYML32IN64
// In 32-in-64 RBX always contains the heap base address.
assemblyInterface.p_rbx.stackAddr = (stackItem*)globalHeapBase;
#endif
}
// In Solaris-x86 the registers are named EIP and ESP.
#if (!defined(REG_EIP) && defined(EIP))
#define REG_EIP EIP
#endif
#if (!defined(REG_ESP) && defined(ESP))
#define REG_ESP ESP
#endif
// Get the PC and SP(stack) from a signal context. This is needed for profiling.
// This version gets the actual sp and pc if we are in ML.
// N.B. This must not call malloc since we're in a signal handler.
bool X86TaskData::AddTimeProfileCount(SIGNALCONTEXT *context)
{
stackItem * sp = 0;
POLYCODEPTR pc = 0;
if (context != 0)
{
// The tests for HAVE_UCONTEXT_T, HAVE_STRUCT_SIGCONTEXT and HAVE_WINDOWS_H need
// to follow the tests in processes.h.
#if defined(HAVE_WINDOWS_H)
#ifdef _WIN64
sp = (stackItem *)context->Rsp;
pc = (POLYCODEPTR)context->Rip;
#else
// Windows 32 including cygwin.
sp = (stackItem *)context->Esp;
pc = (POLYCODEPTR)context->Eip;
#endif
#elif defined(HAVE_UCONTEXT_T)
#ifdef HAVE_MCONTEXT_T_GREGS
// Linux
#ifndef HOSTARCHITECTURE_X86_64
pc = (byte*)context->uc_mcontext.gregs[REG_EIP];
sp = (stackItem*)context->uc_mcontext.gregs[REG_ESP];
#else /* HOSTARCHITECTURE_X86_64 */
pc = (byte*)context->uc_mcontext.gregs[REG_RIP];
sp = (stackItem*)context->uc_mcontext.gregs[REG_RSP];
#endif /* HOSTARCHITECTURE_X86_64 */
#elif defined(HAVE_MCONTEXT_T_MC_ESP)
// FreeBSD
#ifndef HOSTARCHITECTURE_X86_64
pc = (byte*)context->uc_mcontext.mc_eip;
sp = (stackItem*)context->uc_mcontext.mc_esp;
#else /* HOSTARCHITECTURE_X86_64 */
pc = (byte*)context->uc_mcontext.mc_rip;
sp = (stackItem*)context->uc_mcontext.mc_rsp;
#endif /* HOSTARCHITECTURE_X86_64 */
#else
// Mac OS X
#ifndef HOSTARCHITECTURE_X86_64
#if(defined(HAVE_STRUCT_MCONTEXT_SS)||defined(HAVE_STRUCT___DARWIN_MCONTEXT32_SS))
pc = (byte*)context->uc_mcontext->ss.eip;
sp = (stackItem*)context->uc_mcontext->ss.esp;
#elif(defined(HAVE_STRUCT___DARWIN_MCONTEXT32___SS))
pc = (byte*)context->uc_mcontext->__ss.__eip;
sp = (stackItem*)context->uc_mcontext->__ss.__esp;
#endif
#else /* HOSTARCHITECTURE_X86_64 */
#if(defined(HAVE_STRUCT_MCONTEXT_SS)||defined(HAVE_STRUCT___DARWIN_MCONTEXT64_SS))
pc = (byte*)context->uc_mcontext->ss.rip;
sp = (stackItem*)context->uc_mcontext->ss.rsp;
#elif(defined(HAVE_STRUCT___DARWIN_MCONTEXT64___SS))
pc = (byte*)context->uc_mcontext->__ss.__rip;
sp = (stackItem*)context->uc_mcontext->__ss.__rsp;
#endif
#endif /* HOSTARCHITECTURE_X86_64 */
#endif
#elif defined(HAVE_STRUCT_SIGCONTEXT)
#if defined(HOSTARCHITECTURE_X86_64) && defined(__OpenBSD__)
// CPP defines missing in amd64/signal.h in OpenBSD
pc = (byte*)context->sc_rip;
sp = (stackItem*)context->sc_rsp;
#else // !HOSTARCHITEXTURE_X86_64 || !defined(__OpenBSD__)
pc = (byte*)context->sc_pc;
sp = (stackItem*)context->sc_sp;
#endif
#endif
}
if (pc != 0)
{
// See if the PC we've got is an ML code address.
MemSpace *space = gMem.SpaceForAddress(pc);
if (space != 0 && (space->spaceType == ST_CODE || space->spaceType == ST_PERMANENT))
{
incrementCountAsynch(pc);
return true;
}
}
// See if the sp value is in the current stack.
if (sp >= (stackItem*)this->stack->bottom && sp < (stackItem*)this->stack->top)
{
// We may be in the assembly code. The top of the stack will be a return address.
pc = sp[0].w().AsCodePtr();
MemSpace *space = gMem.SpaceForAddress(pc);
if (space != 0 && (space->spaceType == ST_CODE || space->spaceType == ST_PERMANENT))
{
incrementCountAsynch(pc);
return true;
}
}
// See if the value of regSP is a valid stack pointer.
// This works if we happen to be in an RTS call using a "Full" call.
// It doesn't work if we've used a "Fast" call because that doesn't save the SP.
sp = assemblyInterface.stackPtr;
if (sp >= (stackItem*)this->stack->bottom && sp < (stackItem*)this->stack->top)
{
// We may be in the run-time system.
pc = sp[0].w().AsCodePtr();
MemSpace *space = gMem.SpaceForAddress(pc);
if (space != 0 && (space->spaceType == ST_CODE || space->spaceType == ST_PERMANENT))
{
incrementCountAsynch(pc);
return true;
}
}
// None of those worked
return false;
}
// This is called from a different thread so we have to be careful.
void X86TaskData::InterruptCode()
{
PLocker l(&interruptLock);
// Set the stack limit pointer to the top of the stack to cause
// a trap when we next check for stack overflow.
// We use a lock here to ensure that we always use the current value of the
// stack. The thread we're interrupting could be growing the stack at this point.
if (this->stack != 0)
this->assemblyInterface.stackLimit = (stackItem*)(this->stack->top-1);
}
// This is called from SwitchToPoly before we enter the ML code.
void X86TaskData::SetMemRegisters()
{
// Copy the current store limits into variables before we go into the assembly code.
// If we haven't yet set the allocation area or we don't have enough we need
// to create one (or a new one).
if (this->allocPointer <= this->allocLimit + this->allocWords)
{
if (this->allocPointer < this->allocLimit)
Crash ("Bad length in heap overflow trap");
// Find some space to allocate in. Updates taskData->allocPointer and
// returns a pointer to the newly allocated space (if allocWords != 0)
PolyWord *space =
processes->FindAllocationSpace(this, this->allocWords, true);
if (space == 0)
{
// We will now raise an exception instead of returning.
// Set allocWords to zero so we don't set the allocation register
// since that could be holding the exception packet.
this->allocWords = 0;
}
// Undo the allocation just now.
this->allocPointer += this->allocWords;
}
if (this->allocWords != 0)
{
// If we have had a heap trap we actually do the allocation here.
// We will have already garbage collected and recovered sufficient space.
// This also happens if we have just trapped because of store profiling.
this->allocPointer -= this->allocWords; // Now allocate
// Set the allocation register to this area. N.B. This is an absolute address.
if (this->allocReg < 15)
get_reg(this->allocReg)[0].codeAddr = (POLYCODEPTR)(this->allocPointer + 1); /* remember: it's off-by-one */
this->allocWords = 0;
}
// If we have run out of store, either just above or while allocating in the RTS,
// allocPointer and allocLimit will have been set to zero as part of the GC. We will
// now be raising an exception which may free some store but we need to come back here
// before we allocate anything. The compiled code uses unsigned arithmetic to check for
// heap overflow but only after subtracting the space required. We need to make sure
// that the values are still non-negative after substracting any object size.
if (this->allocPointer == 0) this->allocPointer += MAX_OBJECT_SIZE;
if (this->allocLimit == 0) this->allocLimit += MAX_OBJECT_SIZE;
this->assemblyInterface.localMbottom = this->allocLimit + 1;
this->assemblyInterface.localMpointer = this->allocPointer + 1;
// If we are profiling store allocation we set mem_hl so that a trap
// will be generated.
if (profileMode == kProfileStoreAllocation)
this->assemblyInterface.localMbottom = this->assemblyInterface.localMpointer;
this->assemblyInterface.threadId = this->threadObject;
}
// This is called whenever we have returned from ML to C.
void X86TaskData::SaveMemRegisters()
{
if (interpreterPc == 0) // Not if we're already in the interpreter
this->allocPointer = this->assemblyInterface.localMpointer - 1;
this->allocWords = 0;
this->assemblyInterface.exceptionPacket = TAGGED(0);
this->saveRegisterMask = 0;
}
// Called on a GC or stack overflow trap. The register mask
// is in the bytes after the trap call.
void X86TaskData::SetRegisterMask()
{
byte *pc = assemblyInterface.stackPtr[0].codeAddr;
if (*pc == 0xcd) // CD - INT n is used for a single byte
{
pc++;
saveRegisterMask = *pc++;
}
else if (*pc == 0xca) // CA - FAR RETURN is used for a two byte mask
{
pc++;
saveRegisterMask = pc[0] | (pc[1] << 8);
pc += 2;
}
assemblyInterface.stackPtr[0].codeAddr = pc;
}
stackItem *X86TaskData::get_reg(int n)
/* Returns a pointer to the register given by n. */
{
switch (n)
{
case 0: return &assemblyInterface.p_rax;
case 1: return &assemblyInterface.p_rcx;
case 2: return &assemblyInterface.p_rdx;
case 3: return &assemblyInterface.p_rbx;
// Should not have rsp or rbp.
case 6: return &assemblyInterface.p_rsi;
case 7: return &assemblyInterface.p_rdi;
#ifdef HOSTARCHITECTURE_X86_64
case 8: return &assemblyInterface.p_r8;
case 9: return &assemblyInterface.p_r9;
case 10: return &assemblyInterface.p_r10;
case 11: return &assemblyInterface.p_r11;
case 12: return &assemblyInterface.p_r12;
case 13: return &assemblyInterface.p_r13;
case 14: return &assemblyInterface.p_r14;
// R15 is the heap pointer so shouldn't occur here.
#endif /* HOSTARCHITECTURE_X86_64 */
default: Crash("Unknown register %d\n", n);
}
}
// Called as a result of a heap overflow trap
void X86TaskData::HeapOverflowTrap(byte *pcPtr)
{
X86TaskData *mdTask = this;
POLYUNSIGNED wordsNeeded = 0;
// The next instruction, after any branches round forwarding pointers or pop
// instructions, will be a store of register containing the adjusted heap pointer.
// We need to find that register and the value in it in order to find out how big
// the area we actually wanted is. N.B. The code-generator and assembly code
// must generate the correct instruction sequence.
// byte *pcPtr = assemblyInterface.programCtr;
while (true)
{
if (pcPtr[0] == 0xeb)
{
// Forwarding pointer
if (pcPtr[1] >= 128) pcPtr += 256 - pcPtr[1] + 2;
else pcPtr += pcPtr[1] + 2;
}
else if ((pcPtr[0] & 0xf8) == 0x58) // Pop instruction.
pcPtr++;
else if (pcPtr[0] == 0x41 && ((pcPtr[1] & 0xf8) == 0x58)) // Pop with Rex prefix
pcPtr += 2;
else break;
}
#ifndef HOSTARCHITECTURE_X86_64
// This should be movl REG,0[%ebp].
ASSERT(pcPtr[0] == 0x89);
mdTask->allocReg = (pcPtr[1] >> 3) & 7; // Remember this until we allocate the memory
stackItem *reg = get_reg(mdTask->allocReg);
stackItem reg_val = *reg;
// The space we need is the difference between this register
// and the current value of newptr.
// The +1 here is because assemblyInterface.localMpointer is A.M.pointer +1. The reason
// is that after the allocation we have the register pointing at the address we will
// actually use.
wordsNeeded = (this->allocPointer - (PolyWord*)reg_val.stackAddr) + 1;
*reg = TAGGED(0); // Clear this - it's not a valid address.
/* length in words, including length word */
ASSERT (wordsNeeded <= (1<<24)); /* Max object size including length/flag word is 2^24 words. */
#else /* HOSTARCHITECTURE_X86_64 */
ASSERT(pcPtr[1] == 0x89 || pcPtr[1] == 0x8b);
if (pcPtr[1] == 0x89)
{
// New (5.4) format. This should be movq REG,%r15
ASSERT(pcPtr[0] == 0x49 || pcPtr[0] == 0x4d);
mdTask->allocReg = (pcPtr[2] >> 3) & 7; // Remember this until we allocate the memory
if (pcPtr[0] & 0x4) mdTask->allocReg += 8;
}
else
{
// Alternative form of movq REG,%r15
ASSERT(pcPtr[0] == 0x4c || pcPtr[0] == 0x4d);
mdTask->allocReg = pcPtr[2] & 7; // Remember this until we allocate the memory
if (pcPtr[0] & 0x1) mdTask->allocReg += 8;
}
stackItem *reg = get_reg(this->allocReg);
stackItem reg_val = *reg;
wordsNeeded = (POLYUNSIGNED)((this->allocPointer - (PolyWord*)reg_val.stackAddr) + 1);
*reg = TAGGED(0); // Clear this - it's not a valid address.
#endif /* HOSTARCHITECTURE_X86_64 */
if (profileMode == kProfileStoreAllocation)
addProfileCount(wordsNeeded);
mdTask->allocWords = wordsNeeded; // The actual allocation is done in SetMemRegisters.
}
void X86TaskData::SetException(poly_exn *exc)
// The RTS wants to raise an exception packet. Normally this is as the
// result of an RTS call in which case the caller will check this. It can
// also happen in a trap.
{
assemblyInterface.exceptionPacket = (PolyWord)exc; // Set for direct calls.
}
// Decode and process an effective address. There may
// be a constant address in here but in any case we need
// to decode it to work out where the next instruction starts.
// If this is an lea instruction any addresses are just constants
// so must not be treated as addresses.
-static void skipea(PolyObject *base, byte **pt, ScanAddress *process, bool lea)
+static void skipea(PolyObject *base, byte *&pt, ScanAddress *process, bool lea, PolyWord* oldConstAddr,
+ POLYUNSIGNED numCodeWords, POLYSIGNED constAdjustment)
{
- unsigned int modrm = *((*pt)++);
+ unsigned int modrm = *(pt++);
unsigned int md = modrm >> 6;
unsigned int rm = modrm & 7;
if (md == 3) { } /* Register. */
else if (rm == 4)
{
/* s-i-b present. */
- unsigned int sib = *((*pt)++);
+ unsigned int sib = *(pt++);
if (md == 0)
{
if ((sib & 7) == 5)
{
- if (! lea) {
-#ifndef HOSTARCHITECTURE_X86_64
- process->ScanConstant(base, *pt, PROCESS_RELOC_DIRECT);
+ // Absolute address on X86, PC-relative on X64
+ if (! lea)
+ {
+#ifdef HOSTARCHITECTURE_X86_64
+ if (constAdjustment != 0)
+ {
+ POLYSIGNED disp = (pt[3] & 0x80) ? -1 : 0; // Set the sign just in case.
+ for (unsigned i = 4; i > 0; i--)
+ disp = (disp << 8) | pt[i - 1];
+ if (pt + disp > (byte*)base + numCodeWords * sizeof(PolyWord))
+ {
+ disp += constAdjustment;
+ byte* wr = gMem.SpaceForAddress(pt)->writeAble(pt);
+ for (unsigned i = 0; i < 4; i++)
+ {
+ wr[i] = (byte)(disp & 0xff);
+ disp >>= 8;
+ }
+ ASSERT(disp == 0 || disp == -1);
+ }
+ }
+ process->RelocateOnly(base, pt, PROCESS_RELOC_I386RELATIVE);
+#else
+ process->ScanConstant(base, pt, PROCESS_RELOC_DIRECT);
#endif /* HOSTARCHITECTURE_X86_64 */
}
- (*pt) += 4;
+ pt += 4;
}
}
- else if (md == 1) (*pt)++;
- else if (md == 2) (*pt) += 4;
+ else if (md == 1) pt++;
+ else if (md == 2) pt += 4;
}
else if (md == 0 && rm == 5)
{
- if (!lea) {
-#ifndef HOSTARCHITECTURE_X86_64
- /* Absolute address. */
- process->ScanConstant(base, *pt, PROCESS_RELOC_DIRECT);
+ // Absolute address on X86, PC-relative on X64
+ if (!lea)
+ {
+#ifdef HOSTARCHITECTURE_X86_64
+ if (constAdjustment != 0)
+ {
+ POLYSIGNED disp = (pt[3] & 0x80) ? -1 : 0; // Set the sign just in case.
+ for (unsigned i = 4; i > 0; i--)
+ disp = (disp << 8) | pt[i - 1];
+ if (pt + disp > (byte*)base + numCodeWords * sizeof(PolyWord))
+ {
+ disp += constAdjustment;
+ byte* wr = gMem.SpaceForAddress(pt)->writeAble(pt);
+ for (unsigned i = 0; i < 4; i++)
+ {
+ wr[i] = (byte)(disp & 0xff);
+ disp >>= 8;
+ }
+ ASSERT(disp == 0 || disp == -1);
+ }
+ }
+ process->RelocateOnly(base, pt, PROCESS_RELOC_I386RELATIVE);
+#else
+ process->ScanConstant(base, pt, PROCESS_RELOC_DIRECT);
#endif /* HOSTARCHITECTURE_X86_64 */
}
- *pt += 4;
+ pt += 4;
}
else
{
- if (md == 1) *pt += 1;
- else if (md == 2) *pt += 4;
+ if (md == 1) pt += 1;
+ else if (md == 2) pt += 4;
}
}
/* Added to deal with constants within the
code rather than in the constant area. The constant
area is still needed for the function name.
DCJM 2/1/2001
*/
-void X86Dependent::ScanConstantsWithinCode(PolyObject *addr, PolyObject *old, POLYUNSIGNED length, ScanAddress *process)
+void X86Dependent::ScanConstantsWithinCode(PolyObject *addr, PolyObject *old, POLYUNSIGNED length, PolyWord* newConstAddr, PolyWord* oldConstAddr,
+ POLYUNSIGNED numConsts, ScanAddress *process)
{
byte *pt = (byte*)addr;
PolyWord *end = addr->Offset(length - 1);
+ // If we have constants and code in separate areas then we will have to
+ // adjust the offsets of constants in the constant area.
+ // There are also offsets to non-address constants and these must
+ // not be altered.
+ POLYUNSIGNED numCodeWords = length - 1;
+ if (oldConstAddr > (PolyWord*)old && oldConstAddr < ((PolyWord*)old) + length)
+ numCodeWords -= numConsts;
+ POLYSIGNED constAdjustment =
+ (byte*)newConstAddr - (byte*)addr - ((byte*)oldConstAddr - (byte*)old);
+#ifdef HOSTARCHITECTURE_X86_64
+ // Put in a relocation for the offset itself if necessary.
+ process->RelocateOnly(addr, (byte*)end, PROCESS_RELOC_I386RELATIVE);
+ // There's a problem if the code and constant areas are allocated too
+ // far apart that the offsets exceeed 32-bits. For testing just
+ // include this assertion.
+ ASSERT(constAdjustment >= -(POLYSIGNED)0x80000000 && constAdjustment <= 0x7fffffff);
+#endif
// If this begins with enter-int it's interpreted code - ignore
if (pt[0] == 0xff && pt[1] == 0x55 && (pt[2] == 0x48 || pt[2] == 0x24)) return;
while (true)
{
// Escape prefixes come before any Rex byte
if (*pt == 0xf2 || *pt == 0xf3 || *pt == 0x66)
pt++;
#ifdef HOSTARCHITECTURE_X86_64
// REX prefixes. Set this first.
byte lastRex;
if (*pt >= 0x40 && *pt <= 0x4f)
lastRex = *pt++;
else
lastRex = 0;
//printf("pt=%p *pt=%x\n", pt, *pt);
#endif /* HOSTARCHITECTURE_X86_64 */
switch (*pt)
{
case 0x00: return; // This is actually the first byte of the old "marker" word.
case 0xf4: return; // Halt - now used as a marker.
case 0x50: case 0x51: case 0x52: case 0x53:
case 0x54: case 0x55: case 0x56: case 0x57: /* Push */
case 0x58: case 0x59: case 0x5a: case 0x5b:
case 0x5c: case 0x5d: case 0x5e: case 0x5f: /* Pop */
case 0x90: /* nop */ case 0xc3: /* ret */
case 0xf9: /* stc */ case 0xce: /* into */
case 0xf0: /* lock. */ case 0xf3: /* rep/repe */
case 0xa4: case 0xa5: case 0xaa: case 0xab: /* movs/stos */
case 0xa6: /* cmpsb */ case 0x9e: /* sahf */ case 0x99: /* cqo/cdq */
pt++; break;
case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77:
case 0x78: case 0x79: case 0x7a: case 0x7b: case 0x7c: case 0x7d: case 0x7e: case 0x7f:
case 0xeb:
/* short jumps. */
case 0xcd: /* INT - now used for a register mask */
case 0xa8: /* TEST_ACC8 */
case 0x6a: /* PUSH_8 */
pt += 2; break;
case 0xc2: /* RET_16 */
case 0xca: /* FAR RET 16 - used for a register mask */
pt += 3; break;
case 0x8d: /* leal. */
- pt++; skipea(addr, &pt, process, true); break;
+ pt++; skipea(addr, pt, process, true, oldConstAddr, numCodeWords, constAdjustment); break;
case 0x03: case 0x0b: case 0x13: case 0x1b:
case 0x23: case 0x2b: case 0x33: case 0x3b: /* Add r,ea etc. */
case 0x88: /* MOVB_R_A */ case 0x89: /* MOVL_R_A */
case 0x8b: /* MOVL_A_R */
case 0x62: /* BOUNDL */
case 0xff: /* Group5 */
case 0xd1: /* Group2_1_A */
case 0x8f: /* POP_A */
case 0xd3: /* Group2_CL_A */
case 0x87: // XCHNG
case 0x63: // MOVSXD
- pt++; skipea(addr, &pt, process, false); break;
+ pt++; skipea(addr, pt, process, false, oldConstAddr, numCodeWords, constAdjustment); break;
case 0xf6: /* Group3_a */
{
int isTest = 0;
pt++;
/* The test instruction has an immediate operand. */
if ((*pt & 0x38) == 0) isTest = 1;
- skipea(addr, &pt, process, false);
+ skipea(addr, pt, process, false, oldConstAddr, numCodeWords, constAdjustment);
if (isTest) pt++;
break;
}
case 0xf7: /* Group3_A */
{
int isTest = 0;
pt++;
/* The test instruction has an immediate operand. */
if ((*pt & 0x38) == 0) isTest = 1;
- skipea(addr, &pt, process, false);
+ skipea(addr, pt, process, false, oldConstAddr, numCodeWords, constAdjustment);
if (isTest) pt += 4;
break;
}
case 0xc1: /* Group2_8_A */
case 0xc6: /* MOVB_8_A */
case 0x83: /* Group1_8_A */
case 0x80: /* Group1_8_a */
case 0x6b: // IMUL Ev,Ib
- pt++; skipea(addr, &pt, process, false); pt++; break;
+ pt++; skipea(addr, pt, process, false, oldConstAddr, numCodeWords, constAdjustment); pt++; break;
case 0x69: // IMUL Ev,Iv
- pt++; skipea(addr, &pt, process, false); pt += 4; break;
+ pt++; skipea(addr, pt, process, false, oldConstAddr, numCodeWords, constAdjustment); pt += 4; break;
case 0x81: /* Group1_32_A */
{
pt ++;
#ifndef HOSTARCHITECTURE_X86_64
unsigned opCode = *pt;
#endif
- skipea(addr, &pt, process, false);
+ skipea(addr, pt, process, false, oldConstAddr, numCodeWords, constAdjustment);
// Only check the 32 bit constant if this is a comparison.
// For other operations this may be untagged and shouldn't be an address.
#ifndef HOSTARCHITECTURE_X86_64
if ((opCode & 0x38) == 0x38)
process->ScanConstant(addr, pt, PROCESS_RELOC_DIRECT);
#endif
pt += 4;
break;
}
case 0xe8: case 0xe9:
// Long jump and call. These are used to call constant (known) functions
// and also long jumps within the function.
{
pt++;
POLYSIGNED disp = (pt[3] & 0x80) ? -1 : 0; // Set the sign just in case.
for(unsigned i = 4; i > 0; i--)
disp = (disp << 8) | pt[i-1];
byte *absAddr = pt + disp + 4; // The address is relative to AFTER the constant
// If the new address is within the current piece of code we don't do anything
if (absAddr >= (byte*)addr && absAddr < (byte*)end) {}
- else {
-#ifdef HOSTARCHITECTURE_X86_64
- ASSERT(sizeof(PolyWord) == 4); // Should only be used internally on x64
-#endif /* HOSTARCHITECTURE_X86_64 */
- if (addr != old)
- {
- // The old value of the displacement was relative to the old address before
- // we copied this code segment.
- // We have to correct it back to the original address.
- absAddr = absAddr - (byte*)addr + (byte*)old;
- // We have to correct the displacement for the new location and store
- // that away before we call ScanConstant.
- size_t newDisp = absAddr - pt - 4;
- byte* wr = gMem.SpaceForAddress(pt)->writeAble(pt);
- for (unsigned i = 0; i < 4; i++)
- {
- wr[i] = (byte)(newDisp & 0xff);
- newDisp >>= 8;
- }
- }
- process->ScanConstant(addr, pt, PROCESS_RELOC_I386RELATIVE);
- }
+ else process->ScanConstant(addr, pt, PROCESS_RELOC_I386RELATIVE, (byte*)old- (byte*)addr);
pt += 4;
break;
}
case 0xc7:/* MOVL_32_A */
{
pt++;
if ((*pt & 0xc0) == 0x40 /* Byte offset or sib present */ &&
((*pt & 7) != 4) /* But not sib present */ && pt[1] == 256-sizeof(PolyWord))
{
/* We may use a move instruction to set the length
word on a new segment. We mustn't try to treat this as a constant. */
pt += 6; /* Skip the modrm byte, the offset and the constant. */
}
else
{
- skipea(addr, &pt, process, false);
+ skipea(addr, pt, process, false, oldConstAddr, numCodeWords, constAdjustment);
#ifndef HOSTARCHITECTURE_X86_64
// This isn't used for addresses even in 32-in-64
process->ScanConstant(addr, pt, PROCESS_RELOC_DIRECT);
#endif /* HOSTARCHITECTURE_X86_64 */
pt += 4;
}
break;
}
case 0xb8: case 0xb9: case 0xba: case 0xbb:
case 0xbc: case 0xbd: case 0xbe: case 0xbf: /* MOVL_32_64_R */
pt ++;
#ifdef HOSTARCHITECTURE_X86_64
if ((lastRex & 8) == 0)
pt += 4; // 32-bit mode on 64-bits
else
#endif /* HOSTARCHITECTURE_X86_64 */
{
// This is used in native 32-bit for constants and in
// 32-in-64 for the special case of an absolute address.
process->ScanConstant(addr, pt, PROCESS_RELOC_DIRECT);
pt += sizeof(uintptr_t);
}
break;
case 0x68: /* PUSH_32 */
pt ++;
#if (!defined(HOSTARCHITECTURE_X86_64))
process->ScanConstant(addr, pt, PROCESS_RELOC_DIRECT);
#endif
pt += 4;
break;
case 0x0f: /* ESCAPE */
{
pt++;
switch (*pt)
{
case 0xb1: // cmpxchg
case 0xb6: /* movzl */
case 0xb7: // movzw
case 0xbe: // movsx
case 0xbf: // movsx
case 0xc1: /* xaddl */
case 0xae: // ldmxcsr/stmxcsr
case 0xaf: // imul
case 0x40: case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47:
case 0x48: case 0x49: case 0x4a: case 0x4b: case 0x4c: case 0x4d: case 0x4e: case 0x4f:
// cmov
- pt++; skipea(addr, &pt, process, false); break;
+ pt++; skipea(addr, pt, process, false, oldConstAddr, numCodeWords, constAdjustment); break;
case 0x80: case 0x81: case 0x82: case 0x83:
case 0x84: case 0x85: case 0x86: case 0x87:
case 0x88: case 0x89: case 0x8a: case 0x8b:
case 0x8c: case 0x8d: case 0x8e: case 0x8f:
/* Conditional branches with 32-bit displacement. */
pt += 5; break;
case 0x90: case 0x91: case 0x92: case 0x93:
case 0x94: case 0x95: case 0x96: case 0x97:
case 0x98: case 0x99: case 0x9a: case 0x9b:
case 0x9c: case 0x9d: case 0x9e: case 0x9f:
/* SetCC. */
- pt++; skipea(addr, &pt, process, false); break;
+ pt++; skipea(addr, pt, process, false, oldConstAddr, numCodeWords, constAdjustment); break;
// These are SSE2 instructions
case 0x10: case 0x11: case 0x58: case 0x5c: case 0x59: case 0x5e:
case 0x2e: case 0x2a: case 0x54: case 0x57: case 0x5a: case 0x6e:
case 0x7e: case 0x2c: case 0x2d:
- pt++; skipea(addr, &pt, process, false); break;
+ pt++; skipea(addr, pt, process, false, oldConstAddr, numCodeWords, constAdjustment); break;
case 0x73: // PSRLDQ - EA,imm
- pt++; skipea(addr, &pt, process, false); pt++; break;
+ pt++; skipea(addr, pt, process, false, oldConstAddr, numCodeWords, constAdjustment); pt++; break;
default: Crash("Unknown opcode %d at %p\n", *pt, pt);
}
break;
}
case 0xd8: case 0xd9: case 0xda: case 0xdb:
case 0xdc: case 0xdd: case 0xde: case 0xdf: // Floating point escape instructions
{
pt++;
if ((*pt & 0xe0) == 0xe0) pt++;
- else skipea(addr, &pt, process, false);
+ else skipea(addr, pt, process, false, oldConstAddr, numCodeWords, constAdjustment);
break;
}
default: Crash("Unknown opcode %d at %p\n", *pt, pt);
}
}
}
#if defined(_MSC_VER)
// This saves having to define it in the MASM assembly code.
static uintptr_t X86AsmAtomicExchange(PolyObject* mutexp, uintptr_t value)
{
# if (SIZEOF_POLYWORD == 8)
return InterlockedExchange64((LONG64*)mutexp, value);
# else
return InterlockedExchange((LONG*)mutexp, value);
# endif
}
#else
extern "C" {
// This is only defined in the GAS assembly code
uintptr_t X86AsmAtomicExchange(PolyObject*, uintptr_t);
}
#endif
// Set the mutex to zero (released) and return true if no other thread is waiting.
bool X86TaskData::AtomicallyReleaseMutex(PolyObject* mutexp)
{
uintptr_t oldValue = X86AsmAtomicExchange(mutexp, 0);
return oldValue == 1;
}
extern "C" {
POLYEXTERNALSYMBOL void *PolyX86GetThreadData();
POLYEXTERNALSYMBOL POLYUNSIGNED PolyInterpretedEnterIntMode();
POLYEXTERNALSYMBOL POLYUNSIGNED PolyEndBootstrapMode(FirstArgument threadId, PolyWord function);
+ POLYEXTERNALSYMBOL POLYUNSIGNED PolyX86IsLocalCode(PolyObject* destination);
}
// Return the address of assembly data for the current thread. This is normally in
// RBP except if we are in a callback.
void *PolyX86GetThreadData()
{
// We should get the task data for the thread that is running this code.
// If this thread has been created by the foreign code we will have to
// create a new one here.
TaskData* taskData = processes->GetTaskDataForThread();
if (taskData == 0)
{
try {
taskData = processes->CreateNewTaskData();
}
catch (std::bad_alloc&) {
::Exit("Unable to create thread data - insufficient memory");
}
catch (MemoryException&) {
::Exit("Unable to create thread data - insufficient memory");
}
}
return &((X86TaskData*)taskData)->assemblyInterface;
}
// Do we require EnterInt instructions and if so for which architecture?
// 0 = > None; 1 => X86_32, 2 => X86_64. 3 => X86_32_in_64.
POLYUNSIGNED PolyInterpretedEnterIntMode()
{
#ifdef POLYML32IN64
return TAGGED(3).AsUnsigned();
#elif defined(HOSTARCHITECTURE_X86_64)
return TAGGED(2).AsUnsigned();
#else
return TAGGED(1).AsUnsigned();
#endif
}
// End bootstrap mode and run a new function.
POLYUNSIGNED PolyEndBootstrapMode(FirstArgument threadId, PolyWord function)
{
TaskData* taskData = TaskData::FindTaskForId(threadId);
ASSERT(taskData != 0);
taskData->PreRTSCall();
Handle pushedFunction = taskData->saveVec.push(function);
x86Dependent.mustInterpret = false;
((X86TaskData*)taskData)->EndBootStrap();
taskData->InitStackFrame(taskData, pushedFunction);
taskData->EnterPolyCode();
// Should never return.
ASSERT(0);
return TAGGED(0).AsUnsigned();
}
+// Test whether the target is within the local code area. This is only used on
+// native 64-bits. A call/jump to local code can use a 32-bit displacement
+// whereas a call/jump to a function in the executable will need to use an
+// indirect reference through the code area.
+POLYUNSIGNED PolyX86IsLocalCode(PolyObject* destination)
+{
+ MemSpace* space = gMem.SpaceForObjectAddress(destination);
+ if (space->spaceType == ST_CODE)
+ return TAGGED(1).AsUnsigned();
+ else return TAGGED(0).AsUnsigned();
+}
+
struct _entrypts machineSpecificEPT[] =
{
{ "PolyX86GetThreadData", (polyRTSFunction)&PolyX86GetThreadData },
{ "PolyInterpretedEnterIntMode", (polyRTSFunction)&PolyInterpretedEnterIntMode },
{ "PolyEndBootstrapMode", (polyRTSFunction)&PolyEndBootstrapMode },
+ { "PolyX86IsLocalCode", (polyRTSFunction)&PolyX86IsLocalCode },
{ NULL, NULL} // End of list.
};
diff --git a/mlsource/MLCompiler/CodeTree/X86Code/X86OUTPUTCODE.ML b/mlsource/MLCompiler/CodeTree/X86Code/X86OUTPUTCODE.ML
index 6469cf3c..46c78be0 100644
--- a/mlsource/MLCompiler/CodeTree/X86Code/X86OUTPUTCODE.ML
+++ b/mlsource/MLCompiler/CodeTree/X86Code/X86OUTPUTCODE.ML
@@ -1,4068 +1,4068 @@
(*
Copyright David C. J. Matthews 1989, 2000, 2009-10, 2012-13, 2015-21
Based on original code:
Copyright (c) 2000
Cambridge University Technical Services Limited
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*)
(*
Title: Code Generator Routines.
Author: Dave Matthews, Cambridge University Computer Laboratory
Copyright Cambridge University 1989
*)
(* This module contains the code vector and operations to insert code into
it. Each procedure is compiled into a separate segment. Initially it is
compiled into a fixed size segment, and then copied into a segment of the
correct size at the end.
This module contains all the definitions of the X86 opCodes and registers.
It uses "codeseg" to create and operate on the segment itself.
*)
functor X86OUTPUTCODE (
structure DEBUG: DEBUG
structure PRETTY: PRETTYSIG (* for compilerOutTag *)
structure CODE_ARRAY: CODEARRAYSIG
) : X86CODESIG =
struct
open CODE_ARRAY
open DEBUG
open Address
open Misc
(* May be targeted at native 32-bit, native 64-bit or X86/64 with 32-bit words
and addresses as object Ids. *)
datatype targetArch = Native32Bit | Native64Bit | ObjectId32Bit
val targetArch =
case PolyML.architecture() of
"I386" => Native32Bit
| "X86_64" => Native64Bit
| "X86_64_32" => ObjectId32Bit
| _ => raise InternalError "Unknown target architecture"
(* Some checks - *)
val () =
case (targetArch, wordSize, nativeWordSize) of
(Native32Bit, 0w4, 0w4) => ()
| (Native64Bit, 0w8, 0w8) => ()
| (ObjectId32Bit, 0w4, 0w8) => ()
| _ => raise InternalError "Mismatch of architecture and word-length"
val hostIsX64 = targetArch <> Native32Bit
infix 5 << <<+ <<- >> >>+ >>- ~>> ~>>+ ~>>- (* Shift operators *)
infix 3 andb orb xorb andbL orbL xorbL andb8 orb8 xorb8
val op << = Word.<< and op >> = Word.>>
val (*op <<+ = LargeWord.<< and *) op >>+ = LargeWord.>>
val op <<- = Word8.<< and op >>- = Word8.>>
val op orb8 = Word8.orb
val op andb8 = Word8.andb
val op andb = Word.andb (* and op andbL = LargeWord.andb *)
and op orb = Word.orb
val wordToWord8 = Word8.fromLargeWord o Word.toLargeWord
(*and word8ToWord = Word.fromLargeWord o Word8.toLargeWord*)
val exp2_16 = 0x10000
val exp2_31 = 0x80000000: LargeInt.int
(* Returns true if this a 32-bit machine or if the constant is within 32-bits.
This is exported to the higher levels. N.B. The test for not isX64
avoids a significant overhead with arbitrary precision arithmetic on
X86/32. *)
fun is32bit v = not hostIsX64 orelse ~exp2_31 <= v andalso v < exp2_31
(* tag a short constant *)
fun tag c = 2 * c + 1;
fun is8BitL (n: LargeInt.int) = ~ 0x80 <= n andalso n < 0x80
local
val shift =
if wordSize = 0w4
then 0w2
else if wordSize = 0w8
then 0w3
else raise InternalError "Invalid word size for x86_32 or x86+64"
in
fun wordsToBytes n = n << shift
and bytesToWords n = n >> shift
end
infix 6 addrPlus addrMinus;
(* All indexes into the code vector have type "addrs". This is really a legacy. *)
type addrs = Word.word
val addrZero = 0w0
(* This is the external label type used when constructing operations. *)
datatype label = Label of { labelNo: int }
(* Constants which are too large to go inline in the code are put in
a list and put at the end of the code. They are arranged so that
the garbage collector can find them and change them as necessary.
A reference to a constant is treated like a forward reference to a
label. *)
datatype code =
Code of
{
procName: string, (* Name of the procedure. *)
printAssemblyCode:bool, (* Whether to print the code when we finish. *)
printStream: string->unit, (* The stream to use *)
lowLevelOptimise: bool, (* Whether to do the low-level optimisation pass *)
profileObject : machineWord (* The profile object for this code. *)
}
(* Exported functions *)
fun lowLevelOptimise(Code{lowLevelOptimise, ...}) = lowLevelOptimise
(* EBP/RBP points to a structure that interfaces to the RTS. These are
offsets into that structure. *)
val memRegLocalMPointer = 0 (* Not used in 64-bit *)
and memRegHandlerRegister = Word.toInt nativeWordSize
and memRegLocalMbottom = 2 * Word.toInt nativeWordSize
and memRegStackLimit = 3 * Word.toInt nativeWordSize
and memRegExceptionPacket = 4 * Word.toInt nativeWordSize
and memRegCStackPtr = 6 * Word.toInt nativeWordSize
and memRegThreadSelf = 7 * Word.toInt nativeWordSize
and memRegStackPtr = 8 * Word.toInt nativeWordSize
and memRegHeapOverflowCall = 10 * Word.toInt nativeWordSize
and memRegStackOverflowCall = 11 * Word.toInt nativeWordSize
and memRegStackOverflowCallEx = 12 * Word.toInt nativeWordSize
and memRegSavedRbx = 15 * Word.toInt nativeWordSize (* Heap base in 32-in-64. *)
(* create and initialise a code segment *)
fun codeCreate (name : string, profObj, parameters) : code =
let
val printStream = PRETTY.getSimplePrinter(parameters, [])
in
Code
{
procName = name,
printAssemblyCode = DEBUG.getParameter DEBUG.assemblyCodeTag parameters,
printStream = printStream,
lowLevelOptimise = DEBUG.getParameter DEBUG.lowlevelOptimiseTag parameters,
profileObject = profObj
}
end
(* Put 1 unsigned byte at a given offset in the segment. *)
fun set8u (b, addr, seg) = byteVecSet (seg, addr, b)
(* Put 4 bytes at a given offset in the segment. *)
(* b0 is the least significant byte. *)
fun set4Bytes (b3, b2, b1, b0, addr, seg) =
let
val a = addr;
in
(* Little-endian *)
byteVecSet (seg, a, b0);
byteVecSet (seg, a + 0w1, b1);
byteVecSet (seg, a + 0w2, b2);
byteVecSet (seg, a + 0w3, b3)
end;
(* Put 1 unsigned word at a given offset in the segment. *)
fun set32u (ival: LargeWord.word, addr, seg) : unit =
let
val b3 = Word8.fromLargeWord (ival >>+ 0w24)
val b2 = Word8.fromLargeWord (ival >>+ 0w16)
val b1 = Word8.fromLargeWord (ival >>+ 0w8)
val b0 = Word8.fromLargeWord ival
in
set4Bytes (b3, b2, b1, b0, addr, seg)
end
(* Put 1 signed word at a given offset in the segment. *)
fun set32s (ival: LargeInt.int, addr, seg) = set32u(LargeWord.fromLargeInt ival, addr, seg)
fun byteSigned ival =
if ~0x80 <= ival andalso ival < 0x80
then Word8.fromInt ival
else raise InternalError "byteSigned: invalid byte"
(* Convert a large-word value to a little-endian byte sequence. *)
fun largeWordToBytes(_, 0) = []
| largeWordToBytes(ival: LargeWord.word, n) =
Word8.fromLargeWord ival :: largeWordToBytes(ival >>+ 0w8, n-1)
fun word32Unsigned(ival: LargeWord.word) = largeWordToBytes(ival, 4)
fun int32Signed(ival: LargeInt.int) =
if is32bit ival
then word32Unsigned(LargeWord.fromLargeInt ival)
else raise InternalError "int32Signed: invalid word"
(* Registers. *)
datatype genReg = GeneralReg of Word8.word * bool
and fpReg = FloatingPtReg of Word8.word
and xmmReg = SSE2Reg of Word8.word
datatype reg =
GenReg of genReg
| FPReg of fpReg
| XMMReg of xmmReg
(* These are the real registers we have. The AMD extension encodes the
additional registers through the REX prefix. *)
val rax = GeneralReg (0w0, false)
val rcx = GeneralReg (0w1, false)
val rdx = GeneralReg (0w2, false)
val rbx = GeneralReg (0w3, false)
val rsp = GeneralReg (0w4, false)
val rbp = GeneralReg (0w5, false)
val rsi = GeneralReg (0w6, false)
val rdi = GeneralReg (0w7, false)
val eax = rax and ecx = rcx and edx = rdx and ebx = rbx
and esp = rsp and ebp = rbp and esi = rsi and edi = rdi
val r8 = GeneralReg (0w0, true)
val r9 = GeneralReg (0w1, true)
val r10 = GeneralReg (0w2, true)
val r11 = GeneralReg (0w3, true)
val r12 = GeneralReg (0w4, true)
val r13 = GeneralReg (0w5, true)
val r14 = GeneralReg (0w6, true)
val r15 = GeneralReg (0w7, true)
(* Floating point "registers". Actually entries on the floating point stack.
The X86 has a floating point stack with eight entries. *)
val fp0 = FloatingPtReg 0w0
and fp1 = FloatingPtReg 0w1
and fp2 = FloatingPtReg 0w2
and fp3 = FloatingPtReg 0w3
and fp4 = FloatingPtReg 0w4
and fp5 = FloatingPtReg 0w5
and fp6 = FloatingPtReg 0w6
and fp7 = FloatingPtReg 0w7
(* SSE2 Registers. These are used for floating point in 64-bity mode.
We only use XMM0-6 because the others are callee save and we don't
currently save them. *)
val xmm0 = SSE2Reg 0w0
and xmm1 = SSE2Reg 0w1
and xmm2 = SSE2Reg 0w2
and xmm3 = SSE2Reg 0w3
and xmm4 = SSE2Reg 0w4
and xmm5 = SSE2Reg 0w5
and xmm6 = SSE2Reg 0w6
and xmm7 = SSE2Reg 0w7
fun getReg (GeneralReg r) = r
fun mkReg n = GeneralReg n (* reg.up *)
(* The maximum size of the register vectors and masks. Although the
X86/32 has a floating point stack with eight entries it's much simpler
to treat it as having seven "real" registers. Items are pushed to the
stack and then stored and popped into the current location. It may be
possible to improve the code by some peephole optimisation. *)
val regs = 30 (* Include the X86/64 registers even if this is 32-bit. *)
(* The nth register (counting from 0). *)
(* Profiling shows that applying the constructors here creates a lot of
garbage. Create the entries once and then use vector indexing instead. *)
local
fun regN i =
if i < 8
then GenReg(GeneralReg(Word8.fromInt i, false))
else if i < 16
then GenReg(GeneralReg(Word8.fromInt(i-8), true))
else if i < 23
then FPReg(FloatingPtReg(Word8.fromInt(i-16)))
else XMMReg(SSE2Reg(Word8.fromInt(i-23)))
val regVec = Vector.tabulate(regs, regN)
in
fun regN i = Vector.sub(regVec, i) handle Subscript => raise InternalError "Bad register number"
end
(* The number of the register. *)
fun nReg(GenReg(GeneralReg(r, false))) = Word8.toInt r
| nReg(GenReg(GeneralReg(r, true))) = Word8.toInt r + 8
| nReg(FPReg(FloatingPtReg r)) = Word8.toInt r + 16
| nReg(XMMReg(SSE2Reg r)) = Word8.toInt r + 23
datatype opsize = SZByte | SZWord | SZDWord | SZQWord
(* Default size when printing regs. *)
val sz32_64 = if hostIsX64 then SZQWord else SZDWord
fun genRegRepr(GeneralReg (0w0, false), SZByte) = "al"
| genRegRepr(GeneralReg (0w1, false), SZByte) = "cl"
| genRegRepr(GeneralReg (0w2, false), SZByte) = "dl"
| genRegRepr(GeneralReg (0w3, false), SZByte) = "bl"
| genRegRepr(GeneralReg (0w4, false), SZByte) = "ah"
| genRegRepr(GeneralReg (0w5, false), SZByte) = "ch"
| genRegRepr(GeneralReg (0w6, false), SZByte) = "sil" (* Assume there's a Rex code that forces low-order reg *)
| genRegRepr(GeneralReg (0w7, false), SZByte) = "dil"
| genRegRepr(GeneralReg (reg, true), SZByte) = "r" ^ Int.toString(Word8.toInt reg +8) ^ "b"
| genRegRepr(GeneralReg (0w0, false), SZDWord) = "eax"
| genRegRepr(GeneralReg (0w1, false), SZDWord) = "ecx"
| genRegRepr(GeneralReg (0w2, false), SZDWord) = "edx"
| genRegRepr(GeneralReg (0w3, false), SZDWord) = "ebx"
| genRegRepr(GeneralReg (0w4, false), SZDWord) = "esp"
| genRegRepr(GeneralReg (0w5, false), SZDWord) = "ebp"
| genRegRepr(GeneralReg (0w6, false), SZDWord) = "esi"
| genRegRepr(GeneralReg (0w7, false), SZDWord) = "edi"
| genRegRepr(GeneralReg (reg, true), SZDWord) = "r" ^ Int.toString(Word8.toInt reg +8) ^ "d"
| genRegRepr(GeneralReg (0w0, false), SZQWord) = "rax"
| genRegRepr(GeneralReg (0w1, false), SZQWord) = "rcx"
| genRegRepr(GeneralReg (0w2, false), SZQWord) = "rdx"
| genRegRepr(GeneralReg (0w3, false), SZQWord) = "rbx"
| genRegRepr(GeneralReg (0w4, false), SZQWord) = "rsp"
| genRegRepr(GeneralReg (0w5, false), SZQWord) = "rbp"
| genRegRepr(GeneralReg (0w6, false), SZQWord) = "rsi"
| genRegRepr(GeneralReg (0w7, false), SZQWord) = "rdi"
| genRegRepr(GeneralReg (reg, true), SZQWord) = "r" ^ Int.toString(Word8.toInt reg +8)
| genRegRepr(GeneralReg (0w0, false), SZWord) = "ax"
| genRegRepr(GeneralReg (0w1, false), SZWord) = "cx"
| genRegRepr(GeneralReg (0w2, false), SZWord) = "dx"
| genRegRepr(GeneralReg (0w3, false), SZWord) = "bx"
| genRegRepr(GeneralReg (0w4, false), SZWord) = "sp"
| genRegRepr(GeneralReg (0w5, false), SZWord) = "bp"
| genRegRepr(GeneralReg (0w6, false), SZWord) = "si"
| genRegRepr(GeneralReg (0w7, false), SZWord) = "di"
| genRegRepr(GeneralReg (reg, true), SZWord) = "r" ^ Int.toString(Word8.toInt reg +8) ^ "w"
| genRegRepr _ = "unknown" (* Suppress warning because word values are not exhaustive. *)
and fpRegRepr(FloatingPtReg n) = "fp" ^ Word8.toString n
and xmmRegRepr(SSE2Reg n) = "xmm" ^ Word8.toString n
fun regRepr(GenReg r) = genRegRepr (r, sz32_64)
| regRepr(FPReg r) = fpRegRepr r
| regRepr(XMMReg r) = xmmRegRepr r
(* Install a pretty printer. This is simply for when this code is being
run under the debugger. N.B. We need PolyML.PrettyString here. *)
val () = PolyML.addPrettyPrinter(fn _ => fn _ => fn r => PolyML.PrettyString(regRepr r))
datatype argType = ArgGeneral | ArgFP
(* Size of operand. OpSize64 is only valid in 64-bit mode. *)
datatype opSize = OpSize32 | OpSize64
structure RegSet =
struct
(* Implement a register set as a bit mask. *)
datatype regSet = RegSet of word
fun singleton r = RegSet(0w1 << Word.fromInt(nReg r))
fun regSetUnion(RegSet r1, RegSet r2) = RegSet(Word.orb(r1, r2))
fun regSetIntersect(RegSet r1, RegSet r2) = RegSet(Word.andb(r1, r2))
local
fun addReg(acc, n) =
if n = regs then acc else addReg(regSetUnion(acc, singleton(regN n)), n+1)
in
val allRegisters = addReg(RegSet 0w0, 0)
end
val noRegisters = RegSet 0w0
fun inSet(r, rs) = regSetIntersect(singleton r, rs) <> noRegisters
fun regSetMinus(RegSet s1, RegSet s2) = RegSet(Word.andb(s1, Word.notb s2))
val listToSet = List.foldl (fn(r, rs) => regSetUnion(singleton r, rs)) noRegisters
local
val regs =
case targetArch of
Native32Bit => [eax, ecx, edx, ebx, esi, edi]
| Native64Bit => [eax, ecx, edx, ebx, esi, edi, r8, r9, r10, r11, r12, r13, r14]
| ObjectId32Bit => [eax, ecx, edx, esi, edi, r8, r9, r10, r11, r12, r13, r14]
in
val generalRegisters = listToSet(map GenReg regs)
end
(* The floating point stack. Note that this excludes one item so it is always
possible to load a value onto the top of the FP stack. *)
val floatingPtRegisters =
listToSet(map FPReg [fp0, fp1, fp2, fp3, fp4, fp5, fp6(*, fp7*)])
val sse2Registers =
listToSet(map XMMReg [xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6])
fun isAllRegs rs = rs = allRegisters
fun setToList (RegSet regSet)=
let
fun testBit (n, bit, res) =
if n = regs
then res
else testBit(n+1, bit << 0w1,
if (regSet andb bit) <> 0w0
then regN n :: res else res)
in
testBit(0, 0w1, [])
end
val cardinality = List.length o setToList
(* Choose one of the set. This chooses the least value which means that
the ordering of the registers is significant. This is a hot-spot
so is coded directly with the word operations. *)
fun oneOf(RegSet regSet) =
let
fun find(n, bit) =
if n = Word.fromInt regs then raise InternalError "oneOf: empty"
else if Word.andb(bit, regSet) <> 0w0 then n
else find(n+0w1, Word.<<(bit, 0w1))
in
regN(Word.toInt(find(0w0, 0w1)))
end
fun regSetRepr regSet =
let
val regs = setToList regSet
in
"[" ^ String.concatWith "," (List.map regRepr regs) ^ "]"
end
(* Install a pretty printer for when this code is being debugged. *)
val () = PolyML.addPrettyPrinter(fn _ => fn _ => fn r => PolyML.PrettyString(regSetRepr r))
end
open RegSet
datatype arithOp = ADD | OR (*|ADC | SBB*) | AND | SUB | XOR | CMP
fun arithOpToWord ADD = 0w0: Word8.word
| arithOpToWord OR = 0w1
| arithOpToWord AND = 0w4
| arithOpToWord SUB = 0w5
| arithOpToWord XOR = 0w6
| arithOpToWord CMP = 0w7
fun arithOpRepr ADD = "Add"
| arithOpRepr OR = "Or"
| arithOpRepr AND = "And"
| arithOpRepr SUB = "Sub"
| arithOpRepr XOR = "Xor"
| arithOpRepr CMP = "Cmp"
datatype shiftType = SHL | SHR | SAR
fun shiftTypeToWord SHL = 0w4: Word8.word
| shiftTypeToWord SHR = 0w5
| shiftTypeToWord SAR = 0w7
fun shiftTypeRepr SHL = "Shift Left Logical"
| shiftTypeRepr SHR = "Shift Right Logical"
| shiftTypeRepr SAR = "Shift Right Arithemetic"
datatype repOps = CMPS8 | MOVS8 | MOVS32 | STOS8 | STOS32 | MOVS64 | STOS64
fun repOpsToWord CMPS8 = 0wxa6: Word8.word
| repOpsToWord MOVS8 = 0wxa4
| repOpsToWord MOVS32 = 0wxa5
| repOpsToWord MOVS64 = 0wxa5 (* Plus Rex.w *)
| repOpsToWord STOS8 = 0wxaa
| repOpsToWord STOS32 = 0wxab
| repOpsToWord STOS64 = 0wxab (* Plus Rex.w *)
fun repOpsRepr CMPS8 = "CompareBytes"
| repOpsRepr MOVS8 = "MoveBytes"
| repOpsRepr MOVS32 = "MoveWords32"
| repOpsRepr MOVS64 = "MoveWords64"
| repOpsRepr STOS8 = "StoreBytes"
| repOpsRepr STOS32 = "StoreWords32"
| repOpsRepr STOS64 = "StoreWords64"
datatype fpOps = FADD | FMUL | FCOM | FCOMP | FSUB | FSUBR | FDIV | FDIVR
fun fpOpToWord FADD = 0w0: Word8.word
| fpOpToWord FMUL = 0w1
| fpOpToWord FCOM = 0w2
| fpOpToWord FCOMP = 0w3
| fpOpToWord FSUB = 0w4
| fpOpToWord FSUBR = 0w5
| fpOpToWord FDIV = 0w6
| fpOpToWord FDIVR = 0w7
fun fpOpRepr FADD = "FPAdd"
| fpOpRepr FMUL = "FPMultiply"
| fpOpRepr FCOM = "FPCompare"
| fpOpRepr FCOMP = "FPCompareAndPop"
| fpOpRepr FSUB = "FPSubtract"
| fpOpRepr FSUBR = "FPReverseSubtract"
| fpOpRepr FDIV = "FPDivide"
| fpOpRepr FDIVR = "FPReverseDivide"
datatype fpUnaryOps = FCHS | FABS | FLD1 | FLDZ
fun fpUnaryToWords FCHS = {rm=0w0:Word8.word, nnn=0w4: Word8.word}
| fpUnaryToWords FABS = {rm=0w1, nnn=0w4}
| fpUnaryToWords FLD1 = {rm=0w0, nnn=0w5}
| fpUnaryToWords FLDZ = {rm=0w6, nnn=0w5}
fun fpUnaryRepr FCHS = "FPChangeSign"
| fpUnaryRepr FABS = "FPAbs"
| fpUnaryRepr FLD1 = "FPLoadOne"
| fpUnaryRepr FLDZ = "FPLoadZero"
datatype branchOps = JO | JNO | JE | JNE | JL | JGE | JLE | JG | JB | JNB | JNA | JA | JP | JNP
fun branchOpToWord JO = 0wx0: Word8.word
| branchOpToWord JNO = 0wx1
| branchOpToWord JB = 0wx2
| branchOpToWord JNB = 0wx3
| branchOpToWord JE = 0wx4
| branchOpToWord JNE = 0wx5
| branchOpToWord JNA = 0wx6
| branchOpToWord JA = 0wx7
| branchOpToWord JP = 0wxa
| branchOpToWord JNP = 0wxb
| branchOpToWord JL = 0wxc
| branchOpToWord JGE = 0wxd
| branchOpToWord JLE = 0wxe
| branchOpToWord JG = 0wxf
fun branchOpRepr JO = "Overflow"
| branchOpRepr JNO = "NotOverflow"
| branchOpRepr JE = "Equal"
| branchOpRepr JNE = "NotEqual"
| branchOpRepr JL = "Less"
| branchOpRepr JGE = "GreaterOrEqual"
| branchOpRepr JLE = "LessOrEqual"
| branchOpRepr JG = "Greater"
| branchOpRepr JB = "Before"
| branchOpRepr JNB= "NotBefore"
| branchOpRepr JNA = "NotAfter"
| branchOpRepr JA = "After"
| branchOpRepr JP = "Parity"
| branchOpRepr JNP = "NoParity"
(* Invert a test. This is used if we want to change the
sense of a test from jumping if the condition is true to
jumping if it is false. *)
fun invertTest JE = JNE
| invertTest JNE = JE
| invertTest JA = JNA
| invertTest JB = JNB
| invertTest JNA = JA
| invertTest JNB = JB
| invertTest JL = JGE
| invertTest JG = JLE
| invertTest JLE = JG
| invertTest JGE = JL
| invertTest JO = JNO
| invertTest JNO = JO
| invertTest JP = JNP
| invertTest JNP = JP
datatype sse2Operations =
SSE2MoveDouble | SSE2MoveFloat | SSE2CompDouble | SSE2AddDouble |
SSE2SubDouble | SSE2MulDouble | SSE2DivDouble |
SSE2Xor | SSE2And | SSE2FloatToDouble | SSE2DoubleToFloat |
SSE2CompSingle | SSE2AddSingle | SSE2SubSingle | SSE2MulSingle | SSE2DivSingle
fun sse2OpRepr SSE2MoveDouble = "SSE2MoveDouble"
| sse2OpRepr SSE2MoveFloat = "SSE2MoveFloat"
| sse2OpRepr SSE2CompDouble = "SSE2CompDouble"
| sse2OpRepr SSE2AddDouble = "SSE2AddDouble"
| sse2OpRepr SSE2SubDouble = "SSE2SubDouble"
| sse2OpRepr SSE2MulDouble = "SSE2MulDouble"
| sse2OpRepr SSE2DivDouble = "SSE2DivDouble"
| sse2OpRepr SSE2Xor = "SSE2Xor"
| sse2OpRepr SSE2And = "SSE2And"
| sse2OpRepr SSE2CompSingle = "SSE2CompSingle"
| sse2OpRepr SSE2AddSingle = "SSE2AddSingle"
| sse2OpRepr SSE2SubSingle = "SSE2SubSingle"
| sse2OpRepr SSE2MulSingle = "SSE2MulSingle"
| sse2OpRepr SSE2DivSingle = "SSE2DivSingle"
| sse2OpRepr SSE2FloatToDouble = "SSE2FloatToDouble"
| sse2OpRepr SSE2DoubleToFloat = "SSE2DoubleToFloat"
(* Primary opCodes. N.B. only opCodes actually used are listed here.
If new instruction are added check they will be handled by the
run-time system in the event of trap. *)
datatype opCode =
Group1_8_A32
| Group1_8_A64
| Group1_32_A32
| Group1_32_A64
| Group1_8_a
| JMP_8
| JMP_32
| CALL_32
| MOVL_A_R32
| MOVL_A_R64
| MOVL_R_A32
| MOVL_R_A64
| MOVL_R_A16
| MOVB_R_A32
| MOVB_R_A64 of {forceRex: bool}
| PUSH_R of Word8.word
| POP_R of Word8.word
| Group5
| NOP
| LEAL32
| LEAL64
| MOVL_32_R of Word8.word
| MOVL_64_R of Word8.word
| MOVL_32_A32
| MOVL_32_A64
| MOVB_8_A
| POP_A
| RET
| RET_16
| CondJump of branchOps
| CondJump32 of branchOps
| SetCC of branchOps
| Arith32 of arithOp * Word8.word
| Arith64 of arithOp * Word8.word
| Group3_A32
| Group3_A64
| Group3_a
| Group2_8_A32
| Group2_8_A64
| Group2_CL_A32
| Group2_CL_A64
| Group2_1_A32
| Group2_1_A64
| PUSH_8
| PUSH_32
| TEST_ACC8
| LOCK_XADD32
| LOCK_XADD64
| LOCK_CMPXCHG32
| LOCK_CMPXCHG64
| FPESC of Word8.word
| XCHNG32
| XCHNG64
| REP (* Rep prefix *)
| MOVZB (* Needs escape code. *)
| MOVZW (* Needs escape code. *)
| MOVSXB32 (* Needs escape code. *)
| MOVSXW32 (* Needs escape code. *)
| MOVSXB64 (* Needs escape code. *)
| MOVSXW64 (* Needs escape code. *)
| IMUL32 (* Needs escape code. *)
| IMUL64 (* Needs escape code. *)
| SSE2StoreSingle (* movss with memory destination - needs escape sequence. *)
| SSE2StoreDouble (* movsd with memory destination - needs escape sequence. *)
| CQO_CDQ32 (* Sign extend before divide.. *)
| CQO_CDQ64 (* Sign extend before divide.. *)
| SSE2Ops of sse2Operations (* SSE2 instructions. *)
| CVTSI2SD32 (* 32 bit int to double *)
| CVTSI2SD64 (* 64 bit int to double *)
| CVTSI2SS32 (* 32 bit int to single *)
| CVTSI2SS64 (* 64 bit int to single *)
| HLT (* End of code marker. *)
| IMUL_C8_32
| IMUL_C8_64
| IMUL_C32_32
| IMUL_C32_64
| MOVDFromXMM (* move 32 bit value from XMM to general reg. *)
| MOVQToXMM (* move 64 bit value from general reg.to XMM *)
| PSRLDQ (* Shift XMM register *)
| LDSTMXCSR
| CVTSD2SI32 (* Double to 32-bit int *)
| CVTSD2SI64 (* Double to 64-bit int *)
| CVTSS2SI32 (* Single to 32-bit int *)
| CVTSS2SI64 (* Single to 64-bit int *)
| CVTTSD2SI32 (* Double to 32-bit int - truncate towards zero *)
| CVTTSD2SI64 (* Double to 64-bit int - truncate towards zero *)
| CVTTSS2SI32 (* Single to 32-bit int - truncate towards zero *)
| CVTTSS2SI64 (* Single to 64-bit int - truncate towards zero *)
| MOVSXD
| CMOV32 of branchOps
| CMOV64 of branchOps
| PAUSE
fun opToInt Group1_8_A32 = 0wx83
| opToInt Group1_8_A64 = 0wx83
| opToInt Group1_32_A32 = 0wx81
| opToInt Group1_32_A64 = 0wx81
| opToInt Group1_8_a = 0wx80
| opToInt JMP_8 = 0wxeb
| opToInt JMP_32 = 0wxe9
| opToInt CALL_32 = 0wxe8
| opToInt MOVL_A_R32 = 0wx8b
| opToInt MOVL_A_R64 = 0wx8b
| opToInt MOVL_R_A32 = 0wx89
| opToInt MOVL_R_A64 = 0wx89
| opToInt MOVL_R_A16 = 0wx89 (* Also has an OPSIZE prefix. *)
| opToInt MOVB_R_A32 = 0wx88
| opToInt (MOVB_R_A64 _) = 0wx88
| opToInt (PUSH_R reg) = 0wx50 + reg
| opToInt (POP_R reg) = 0wx58 + reg
| opToInt Group5 = 0wxff
| opToInt NOP = 0wx90
| opToInt LEAL32 = 0wx8d
| opToInt LEAL64 = 0wx8d
| opToInt (MOVL_32_R reg) = 0wxb8 + reg
| opToInt (MOVL_64_R reg) = 0wxb8 + reg
| opToInt MOVL_32_A32 = 0wxc7
| opToInt MOVL_32_A64 = 0wxc7
| opToInt MOVB_8_A = 0wxc6
| opToInt POP_A = 0wx8f
| opToInt RET = 0wxc3
| opToInt RET_16 = 0wxc2
| opToInt (CondJump opc) = 0wx70 + branchOpToWord opc
| opToInt (CondJump32 opc) = 0wx80 + branchOpToWord opc (* Needs 0F prefix *)
| opToInt (SetCC opc) = 0wx90 + branchOpToWord opc (* Needs 0F prefix *)
| opToInt (Arith32 (ao,dw)) = arithOpToWord ao * 0w8 + dw
| opToInt (Arith64 (ao,dw)) = arithOpToWord ao * 0w8 + dw
| opToInt Group3_A32 = 0wxf7
| opToInt Group3_A64 = 0wxf7
| opToInt Group3_a = 0wxf6
| opToInt Group2_8_A32 = 0wxc1
| opToInt Group2_8_A64 = 0wxc1
| opToInt Group2_1_A32 = 0wxd1
| opToInt Group2_1_A64 = 0wxd1
| opToInt Group2_CL_A32 = 0wxd3
| opToInt Group2_CL_A64 = 0wxd3
| opToInt PUSH_8 = 0wx6a
| opToInt PUSH_32 = 0wx68
| opToInt TEST_ACC8 = 0wxa8
| opToInt LOCK_XADD32 = 0wxC1 (* Needs lock and escape prefixes. *)
| opToInt LOCK_XADD64 = 0wxC1 (* Needs lock and escape prefixes. *)
| opToInt LOCK_CMPXCHG32 = 0wxB1 (* Needs lock and escape prefixes. *)
| opToInt LOCK_CMPXCHG64 = 0wxB1 (* Needs lock and escape prefixes. *)
| opToInt (FPESC n) = 0wxD8 orb8 n
| opToInt XCHNG32 = 0wx87
| opToInt XCHNG64 = 0wx87
| opToInt REP = 0wxf3
| opToInt MOVZB = 0wxb6 (* Needs escape code. *)
| opToInt MOVZW = 0wxb7 (* Needs escape code. *)
| opToInt MOVSXB32 = 0wxbe (* Needs escape code. *)
| opToInt MOVSXW32 = 0wxbf (* Needs escape code. *)
| opToInt MOVSXB64 = 0wxbe (* Needs escape code. *)
| opToInt MOVSXW64 = 0wxbf (* Needs escape code. *)
| opToInt IMUL32 = 0wxaf (* Needs escape code. *)
| opToInt IMUL64 = 0wxaf (* Needs escape code. *)
| opToInt SSE2StoreSingle = 0wx11 (* Needs F3 0F escape. *)
| opToInt SSE2StoreDouble = 0wx11 (* Needs F2 0F escape. *)
| opToInt CQO_CDQ32 = 0wx99
| opToInt CQO_CDQ64 = 0wx99
| opToInt (SSE2Ops SSE2MoveDouble) = 0wx10 (* Needs F2 0F escape. *)
| opToInt (SSE2Ops SSE2MoveFloat) = 0wx10 (* Needs F3 0F escape. *)
| opToInt (SSE2Ops SSE2CompDouble) = 0wx2E (* Needs 66 0F escape. *)
| opToInt (SSE2Ops SSE2AddDouble) = 0wx58 (* Needs F2 0F escape. *)
| opToInt (SSE2Ops SSE2SubDouble) = 0wx5c (* Needs F2 0F escape. *)
| opToInt (SSE2Ops SSE2MulDouble) = 0wx59 (* Needs F2 0F escape. *)
| opToInt (SSE2Ops SSE2DivDouble) = 0wx5e (* Needs F2 0F escape. *)
| opToInt (SSE2Ops SSE2CompSingle) = 0wx2E (* Needs 0F escape. *)
| opToInt (SSE2Ops SSE2AddSingle) = 0wx58 (* Needs F3 0F escape. *)
| opToInt (SSE2Ops SSE2SubSingle) = 0wx5c (* Needs F3 0F escape. *)
| opToInt (SSE2Ops SSE2MulSingle) = 0wx59 (* Needs F3 0F escape. *)
| opToInt (SSE2Ops SSE2DivSingle) = 0wx5e (* Needs F3 0F escape. *)
| opToInt (SSE2Ops SSE2And) = 0wx54 (* Needs 66 0F escape. *)
| opToInt (SSE2Ops SSE2Xor) = 0wx57 (* Needs 66 0F escape. *)
| opToInt (SSE2Ops SSE2FloatToDouble) = 0wx5A (* Needs F3 0F escape. *)
| opToInt (SSE2Ops SSE2DoubleToFloat) = 0wx5A (* Needs F2 0F escape. *)
| opToInt CVTSI2SD32 = 0wx2a (* Needs F2 0F escape. *)
| opToInt CVTSI2SD64 = 0wx2a (* Needs F2 0F escape. *)
| opToInt CVTSI2SS32 = 0wx2a (* Needs F3 0F escape. *)
| opToInt CVTSI2SS64 = 0wx2a (* Needs F3 0F escape. *)
| opToInt HLT = 0wxf4
| opToInt IMUL_C8_32 = 0wx6b
| opToInt IMUL_C8_64 = 0wx6b
| opToInt IMUL_C32_32 = 0wx69
| opToInt IMUL_C32_64 = 0wx69
| opToInt MOVDFromXMM = 0wx7e (* Needs 66 0F escape. *)
| opToInt MOVQToXMM = 0wx6e (* Needs 66 0F escape. *)
| opToInt PSRLDQ = 0wx73 (* Needs 66 0F escape. *)
| opToInt LDSTMXCSR = 0wxae (* Needs 0F prefix. *)
| opToInt CVTSD2SI32 = 0wx2d (* Needs F2 0F prefix. *)
| opToInt CVTSD2SI64 = 0wx2d (* Needs F2 0F prefix and rex.w. *)
| opToInt CVTSS2SI32 = 0wx2d (* Needs F3 0F prefix. *)
| opToInt CVTSS2SI64 = 0wx2d (* Needs F3 0F prefix and rex.w. *)
| opToInt CVTTSD2SI32 = 0wx2c (* Needs F2 0F prefix. *)
| opToInt CVTTSD2SI64 = 0wx2c (* Needs F2 0F prefix. *)
| opToInt CVTTSS2SI32 = 0wx2c (* Needs F3 0F prefix. *)
| opToInt CVTTSS2SI64 = 0wx2c (* Needs F3 0F prefix and rex.w. *)
| opToInt MOVSXD = 0wx63
| opToInt (CMOV32 opc) = 0wx40 + branchOpToWord opc (* Needs 0F prefix *)
| opToInt (CMOV64 opc) = 0wx40 + branchOpToWord opc (* Needs 0F prefix and rex.w *)
| opToInt PAUSE = 0wx90 (* Needs F3 prefix *)
datatype mode =
Based0 (* mod = 0 *)
| Based8 (* mod = 1 *)
| Based32 (* mod = 2 *)
| Register (* mod = 3 *) ;
(* Put together the three fields which make up the mod r/m byte. *)
fun modrm (md : mode, rg: Word8.word, rm : Word8.word) : Word8.word =
let
val _ = if rg > 0w7 then raise InternalError "modrm: bad rg" else ()
val _ = if rm > 0w7 then raise InternalError "modrm: bad rm" else ()
val modField: Word8.word =
case md of
Based0 => 0w0
| Based8 => 0w1
| Based32 => 0w2
| Register => 0w3
in
(modField <<- 0w6) orb8 (rg <<- 0w3) orb8 rm
end
(* REX prefix *)
fun rex {w,r,x,b} =
0wx40 orb8 (if w then 0w8 else 0w0) orb8 (if r then 0w4 else 0w0) orb8
(if x then 0w2 else 0w0) orb8 (if b then 0w1 else 0w0)
(* The X86 has the option to include an index register and to scale it. *)
datatype indexType =
NoIndex | Index1 of genReg | Index2 of genReg | Index4 of genReg | Index8 of genReg
(* Lock, Opsize and REPNE prefixes come before the REX. *)
fun opcodePrefix LOCK_XADD32 = [0wxF0] (* Requires LOCK prefix. *)
| opcodePrefix LOCK_XADD64 = [0wxF0] (* Requires LOCK prefix. *)
| opcodePrefix LOCK_CMPXCHG32 = [0wxF0] (* Requires LOCK prefix. *)
| opcodePrefix LOCK_CMPXCHG64 = [0wxF0] (* Requires LOCK prefix. *)
| opcodePrefix MOVL_R_A16 = [0wx66] (* Requires OPSIZE prefix. *)
| opcodePrefix SSE2StoreSingle = [0wxf3]
| opcodePrefix SSE2StoreDouble = [0wxf2]
| opcodePrefix(SSE2Ops SSE2CompDouble) = [0wx66]
| opcodePrefix(SSE2Ops SSE2And) = [0wx66]
| opcodePrefix(SSE2Ops SSE2Xor) = [0wx66]
| opcodePrefix(SSE2Ops SSE2CompSingle) = [] (* No prefix *)
| opcodePrefix(SSE2Ops SSE2MoveDouble) = [0wxf2]
| opcodePrefix(SSE2Ops SSE2AddDouble) = [0wxf2]
| opcodePrefix(SSE2Ops SSE2SubDouble) = [0wxf2]
| opcodePrefix(SSE2Ops SSE2MulDouble) = [0wxf2]
| opcodePrefix(SSE2Ops SSE2DivDouble) = [0wxf2]
| opcodePrefix(SSE2Ops SSE2DoubleToFloat) = [0wxf2]
| opcodePrefix(SSE2Ops SSE2MoveFloat) = [0wxf3]
| opcodePrefix(SSE2Ops SSE2AddSingle) = [0wxf3]
| opcodePrefix(SSE2Ops SSE2SubSingle) = [0wxf3]
| opcodePrefix(SSE2Ops SSE2MulSingle) = [0wxf3]
| opcodePrefix(SSE2Ops SSE2DivSingle) = [0wxf3]
| opcodePrefix(SSE2Ops SSE2FloatToDouble) = [0wxf3]
| opcodePrefix CVTSI2SD32 = [0wxf2]
| opcodePrefix CVTSI2SD64 = [0wxf2]
| opcodePrefix CVTSI2SS32 = [0wxf3]
| opcodePrefix CVTSI2SS64 = [0wxf3]
| opcodePrefix MOVDFromXMM = [0wx66]
| opcodePrefix MOVQToXMM = [0wx66]
| opcodePrefix PSRLDQ = [0wx66]
| opcodePrefix CVTSD2SI32 = [0wxf2]
| opcodePrefix CVTSD2SI64 = [0wxf2]
| opcodePrefix CVTSS2SI32 = [0wxf3]
| opcodePrefix CVTSS2SI64 = [0wxf3]
| opcodePrefix CVTTSD2SI32 = [0wxf2]
| opcodePrefix CVTTSD2SI64 = [0wxf2]
| opcodePrefix CVTTSS2SI32 = [0wxf3]
| opcodePrefix CVTTSS2SI64 = [0wxf3]
| opcodePrefix PAUSE = [0wxf3]
| opcodePrefix _ = []
(* A few instructions require an escape. Escapes come after the REX. *)
fun escapePrefix MOVZB = [0wx0f]
| escapePrefix MOVZW = [0wx0f]
| escapePrefix MOVSXB32 = [0wx0f]
| escapePrefix MOVSXW32 = [0wx0f]
| escapePrefix MOVSXB64 = [0wx0f]
| escapePrefix MOVSXW64 = [0wx0f]
| escapePrefix LOCK_XADD32 = [0wx0f]
| escapePrefix LOCK_XADD64 = [0wx0f]
| escapePrefix LOCK_CMPXCHG32 = [0wx0f]
| escapePrefix LOCK_CMPXCHG64 = [0wx0f]
| escapePrefix IMUL32 = [0wx0f]
| escapePrefix IMUL64 = [0wx0f]
| escapePrefix(CondJump32 _) = [0wx0f]
| escapePrefix(SetCC _) = [0wx0f]
| escapePrefix SSE2StoreSingle = [0wx0f]
| escapePrefix SSE2StoreDouble = [0wx0f]
| escapePrefix(SSE2Ops _) = [0wx0f]
| escapePrefix CVTSI2SD32 = [0wx0f]
| escapePrefix CVTSI2SD64 = [0wx0f]
| escapePrefix CVTSI2SS32 = [0wx0f]
| escapePrefix CVTSI2SS64 = [0wx0f]
| escapePrefix MOVDFromXMM = [0wx0f]
| escapePrefix MOVQToXMM = [0wx0f]
| escapePrefix PSRLDQ = [0wx0f]
| escapePrefix LDSTMXCSR = [0wx0f]
| escapePrefix CVTSD2SI32 = [0wx0f]
| escapePrefix CVTSD2SI64 = [0wx0f]
| escapePrefix CVTSS2SI32 = [0wx0f]
| escapePrefix CVTSS2SI64 = [0wx0f]
| escapePrefix CVTTSD2SI32 = [0wx0f]
| escapePrefix CVTTSD2SI64 = [0wx0f]
| escapePrefix CVTTSS2SI32 = [0wx0f]
| escapePrefix CVTTSS2SI64 = [0wx0f]
| escapePrefix(CMOV32 _) = [0wx0f]
| escapePrefix(CMOV64 _) = [0wx0f]
| escapePrefix _ = []
(* Generate an opCode byte after doing any pending operations. *)
fun opCodeBytes(opb:opCode, rx) =
let
val rexByte =
case rx of
NONE => []
| SOME rxx =>
if hostIsX64 then [rex rxx]
else raise InternalError "opCodeBytes: rex prefix in 32 bit mode";
in
opcodePrefix opb @ rexByte @ escapePrefix opb @ [opToInt opb]
end
fun rexByte(opb, rrX, rbX, riX) =
let
(* We need a rex prefix if we need to set the length to 64-bit. *)
val need64bit =
case opb of
Group1_8_A64 => true (* Arithmetic operations - must be 64-bit *)
| Group1_32_A64 => true (* Arithmetic operations - must be 64-bit *)
| Group2_1_A64 => true (* 1-bit shifts - must be 64-bit *)
| Group2_8_A64 => true (* n-bit shifts - must be 64-bit *)
| Group2_CL_A64 => true (* Shifts by value in CL *)
| Group3_A64 => true (* Test, Not, Mul etc. *)
| Arith64 (_, _) => true
| MOVL_A_R64 => true (* Needed *)
| MOVL_R_A64 => true (* Needed *)
| XCHNG64 => true
| LEAL64 => true (* Needed to ensure the result is 64-bits *)
| MOVL_64_R _ => true (* Needed *)
| MOVL_32_A64 => true (* Needed *)
| IMUL64 => true (* Needed to ensure the result is 64-bits *)
| LOCK_XADD64 => true (* Needed to ensure the result is 64-bits *)
| LOCK_CMPXCHG64 => true (* Needed to ensure the result is 64-bits *)
| CQO_CDQ64 => true (* It's only CQO if there's a Rex prefix. *)
| CVTSI2SD64 => true (* This affects the size of the integer source. *)
| CVTSI2SS64 => true (* This affects the size of the integer source. *)
| IMUL_C8_64 => true
| IMUL_C32_64 => true
| MOVQToXMM => true
| CVTSD2SI64 => true (* This affects the size of the integer source. *)
| CVTSS2SI64 => true
| CVTTSD2SI64 => true
| CVTTSS2SI64 => true
| MOVSXD => true
| CMOV64 _ => true
| MOVSXB64 => true
| MOVSXW64 => true
(* Group5 - We only use 2/4/6 and they don't need prefix *)
| _ => false
(* If we are using MOVB_R_A with SIL or DIL we need to force a REX prefix.
That's only possible in 64-bit mode. This also applies with Test and SetCC
but they are dealt with elsewhere. *)
val forceRex =
case opb of
MOVB_R_A64 {forceRex=true} => true (* This is allowed in X86/64 but not in X86/32. *)
| _ => false
in
if need64bit orelse rrX orelse rbX orelse riX orelse forceRex
then [rex{w=need64bit, r=rrX, b=rbX, x = riX}]
else []
end
(* Register/register operation. *)
fun opReg(opb:opCode, (*dest*)GeneralReg(rrC, rrX), (*source*)GeneralReg(rbC, rbX)) =
let
val pref = opcodePrefix opb (* Any opsize or lock prefix. *)
val rex = rexByte(opb, rrX, rbX, false)
val esc = escapePrefix opb (* Generate the ESCAPE code if needed. *)
val opc = opToInt opb
val mdrm = modrm(Register, rrC, rbC)
in
pref @ rex @ esc @ [opc, mdrm]
end
(* Operations on a register where the second "register" is actually an operation code. *)
fun opRegPlus2(opb:opCode, rd: genReg, op2: Word8.word) =
let
val (rrC, rrX) = getReg rd
val pref = opcodePrefix opb (* Any opsize or lock prefix. *)
val rex = rexByte(opb, false, rrX, false)
val opc = opToInt opb
val mdrm = modrm(Register, op2, rrC)
in
pref @ rex @ [opc, mdrm]
end
local
(* General instruction form with modrm and optional sib bytes. rb is an option since the
base register may be omitted. This is used with LEA to tag integers. *)
fun opIndexedGen (opb:opCode, offset: LargeInt.int, rb: genReg option, ri: indexType, (rrC, rrX)) =
let
(* Base encoding. (Based0, 0w5) means "no base" so if we need ebp as the
base we have to use Based8 at least. *)
val (offsetCode, rbC, rbX) =
case rb of
NONE => (Based0, 0w5 (* no base register *), false)
| SOME rb =>
let
val (rbC, rbX) = getReg rb
val base =
if offset = 0 andalso rbC <> 0wx5 (* Can't use ebp with Based0 *)
then Based0 (* no disp field *)
else if is8BitL offset
then Based8 (* use 8-bit disp field *)
else Based32 (* use 32-bit disp field *)
in
(base, rbC, rbX)
end
(* Index coding. esp can't be used as an index so (0w4, false) means "no index".
But r12 (0w4, true) CAN be. *)
val ((riC, riX), scaleFactor) =
case ri of
NoIndex => ((0w4, false), 0w0)
| Index1 i => (getReg i, 0w0)
| Index2 i => (getReg i, 0w1)
| Index4 i => (getReg i, 0w2)
| Index8 i => (getReg i, 0w3)
(* If the base register is esp or r12 we have to use a sib byte even if
there's no index. That's because 0w4 as a base register means "there's
a SIB byte". *)
val modRmAndOptionalSib =
if rbC = 0w4 (* Code for esp and r12 *) orelse riC <> 0w4 orelse riX
then
let
val mdrm = modrm(offsetCode, rrC, 0w4 (* s-i-b *))
val sibByte = (scaleFactor <<- 0w6) orb8 (riC <<- 0w3) orb8 rbC
in
[mdrm, sibByte]
end
else [modrm(offsetCode, rrC, rbC)]
(* Generate the disp field (if any) *)
val dispField =
case (offsetCode, rb) of
(Based8, _) => [Word8.fromLargeInt offset]
| (Based32, _) => int32Signed offset
| (_, NONE) => (* 32 bit absolute used as base *) int32Signed offset
| _ => []
in
opcodePrefix opb @ rexByte(opb, rrX, rbX, riX) @ escapePrefix opb @
opToInt opb :: modRmAndOptionalSib @ dispField
end
in
fun opEA(opb, offset, rb, r) = opIndexedGen(opb, offset, SOME rb, NoIndex, getReg r)
(* Generate a opcode plus a second modrm byte but where the "register" field in
the modrm byte is actually a code. *)
and opPlus2(opb, offset, rb, op2) = opIndexedGen(opb, offset, SOME rb, NoIndex, (op2, false))
and opIndexedPlus2(opb, offset, rb, ri, op2) = opIndexedGen(opb, offset, SOME rb, ri, (op2, false))
fun opIndexed (opb, offset, rb, ri, rd) =
opIndexedGen(opb, offset, rb, ri, getReg rd)
fun opAddress(opb, offset, rb, ri, rd) = opIndexedGen (opb, offset, SOME rb, ri, getReg rd)
and mMXAddress(opb, offset, rb, ri, SSE2Reg rrC) = opIndexedGen(opb, offset, SOME rb, ri, (rrC, false))
and opAddressPlus2(opb, offset, rb, ri, op2) =
opIndexedGen(opb, offset, SOME rb, ri, (op2, false))
end
(* An operation with an operand that needs to go in the constant area, or in the case of
native 32-bit, where the constant is stored in an object and the address of the
object is inline. This just puts in the instruction and the address. The details
of the constant are dealt with in putConst. *)
fun opConstantOperand(opb, (*dest*)GeneralReg(rrC, rrX)) =
let
val pref = opcodePrefix opb (* Any opsize or lock prefix. *)
val rex = rexByte(opb, rrX, false, false)
val esc = escapePrefix opb (* Generate the ESCAPE code if needed. *)
val opc = opToInt opb
val mdrm = modrm(Based0, rrC, 0w5 (* PC-relative or absolute *))
in
pref @ rex @ esc @ [opc, mdrm] @ int32Signed(tag 0)
end
fun immediateOperand (opn: arithOp, rd: genReg, imm: LargeInt.int, opSize) =
if is8BitL imm
then (* Can use one byte immediate *)
opRegPlus2(case opSize of OpSize64 => Group1_8_A64 | OpSize32 => Group1_8_A32, rd, arithOpToWord opn) @ [Word8.fromLargeInt imm]
else if is32bit imm
then (* Need 32 bit immediate. *)
opRegPlus2(case opSize of OpSize64 => Group1_32_A64 | OpSize32 => Group1_32_A32, rd, arithOpToWord opn) @ int32Signed imm
else (* It won't fit in the immediate; put it in the non-address area. *)
let
val opc = case opSize of OpSize64 => Arith64 | OpSize32 => Arith32
in
opConstantOperand(opc(opn, 0w3 (* r/m to reg *)), rd)
end
fun arithOpReg(opn: arithOp, rd: genReg, rs: genReg, opIs64) =
opReg ((if opIs64 then Arith64 else Arith32) (opn, 0w3 (* r/m to reg *)), rd, rs)
type handlerLab = addrs ref
fun floatingPtOp{escape, md, nnn, rm} =
opCodeBytes(FPESC escape, NONE) @ [(md <<- 0w6) orb8 (nnn <<- 0w3) orb8 rm]
datatype trapEntries =
StackOverflowCall
| StackOverflowCallEx
| HeapOverflowCall
(* RTS call. We need to save any registers that may contain addresses to the stack.
All the registers are preserved but not seen by the GC. *)
fun rtsCall(rtsEntry, regSet) =
let
val entry =
case rtsEntry of
StackOverflowCall => memRegStackOverflowCall
| StackOverflowCallEx => memRegStackOverflowCallEx
| HeapOverflowCall => memRegHeapOverflowCall
val regSet = List.foldl(fn (r, a) => (0w1 << Word.fromInt(nReg(GenReg r))) orb a) 0w0 regSet
val callInstr =
opPlus2(Group5, LargeInt.fromInt entry, ebp, 0w2 (* call *))
val regSetInstr =
if regSet >= 0w256
then [0wxca, (* This is actually a FAR RETURN *)
wordToWord8 regSet, (* Low byte*) wordToWord8 (regSet >> 0w8) (* High byte*)]
else if regSet <> 0w0
then [0wxcd, (* This is actually INT n *) wordToWord8 regSet]
else []
in
callInstr @ regSetInstr
end
(* Operations. *)
type cases = word * label
type memoryAddress = { base: genReg, offset: int, index: indexType }
datatype 'reg regOrMemoryArg =
RegisterArg of 'reg
| MemoryArg of memoryAddress
| NonAddressConstArg of LargeInt.int
| AddressConstArg of machineWord
datatype moveSize =
Move64 | Move32 | Move8 | Move16 | Move32X64 | Move8X32 | Move8X64 | Move16X32 | Move16X64
and fpSize = SinglePrecision | DoublePrecision
datatype operation =
Move of { source: genReg regOrMemoryArg, destination: genReg regOrMemoryArg, moveSize: moveSize }
| PushToStack of genReg regOrMemoryArg
| PopR of genReg
| ArithToGenReg of { opc: arithOp, output: genReg, source: genReg regOrMemoryArg, opSize: opSize }
| ArithMemConst of { opc: arithOp, address: memoryAddress, source: LargeInt.int, opSize: opSize }
| ArithMemLongConst of { opc: arithOp, address: memoryAddress, source: machineWord }
| ArithByteMemConst of { opc: arithOp, address: memoryAddress, source: Word8.word }
| ShiftConstant of { shiftType: shiftType, output: genReg, shift: Word8.word, opSize: opSize }
| ShiftVariable of { shiftType: shiftType, output: genReg, opSize: opSize } (* Shift amount is in ecx *)
| ConditionalBranch of { test: branchOps, label: label }
| SetCondition of { output: genReg, test: branchOps }
| LoadAddress of { output: genReg, offset: int, base: genReg option, index: indexType, opSize: opSize }
| TestByteBits of { arg: genReg regOrMemoryArg, bits: Word8.word }
| CallRTS of {rtsEntry: trapEntries, saveRegs: genReg list }
| AllocStore of { size: int, output: genReg, saveRegs: genReg list }
| AllocStoreVariable of { size: genReg, output: genReg, saveRegs: genReg list }
| StoreInitialised
| CallAddress of genReg regOrMemoryArg
| JumpAddress of genReg regOrMemoryArg
| ReturnFromFunction of int
| RaiseException of { workReg: genReg }
| UncondBranch of label
| ResetStack of { numWords: int, preserveCC: bool }
| JumpLabel of label
| LoadLabelAddress of { label: label, output: genReg }
| RepeatOperation of repOps
| DivideAccR of {arg: genReg, isSigned: bool, opSize: opSize }
| DivideAccM of {base: genReg, offset: int, isSigned: bool, opSize: opSize }
| AtomicXAdd of {address: memoryAddress, output: genReg, opSize: opSize }
| AtomicCmpXChng of {address: memoryAddress, source: genReg, opSize: opSize }
| FPLoadFromMemory of { address: memoryAddress, precision: fpSize }
| FPLoadFromFPReg of { source: fpReg, lastRef: bool }
| FPLoadFromConst of { constant: machineWord, precision: fpSize }
| FPStoreToFPReg of { output: fpReg, andPop: bool }
| FPStoreToMemory of { address: memoryAddress, precision: fpSize, andPop: bool }
| FPArithR of { opc: fpOps, source: fpReg }
| FPArithConst of { opc: fpOps, source: machineWord, precision: fpSize }
| FPArithMemory of { opc: fpOps, base: genReg, offset: int, precision: fpSize }
| FPUnary of fpUnaryOps
| FPStatusToEAX
| FPLoadInt of { base: genReg, offset: int, opSize: opSize }
| FPFree of fpReg
| MultiplyR of { source: genReg regOrMemoryArg, output: genReg, opSize: opSize }
| XMMArith of { opc: sse2Operations, source: xmmReg regOrMemoryArg, output: xmmReg }
| XMMStoreToMemory of { toStore: xmmReg, address: memoryAddress, precision: fpSize }
| XMMConvertFromInt of { source: genReg, output: xmmReg, opSize: opSize, precision: fpSize }
| SignExtendForDivide of opSize
| XChng of { reg: genReg, arg: genReg regOrMemoryArg, opSize: opSize }
| Negative of { output: genReg, opSize: opSize }
| JumpTable of { cases: label list, jumpSize: jumpSize ref }
| IndexedJumpCalc of { addrReg: genReg, indexReg: genReg, jumpSize: jumpSize ref }
| MoveXMMRegToGenReg of { source: xmmReg, output: genReg }
| MoveGenRegToXMMReg of { source: genReg, output: xmmReg }
| XMMShiftRight of { output: xmmReg, shift: Word8.word }
| FPLoadCtrlWord of memoryAddress (* Load FP control word. *)
| FPStoreCtrlWord of memoryAddress (* Store FP control word. *)
| XMMLoadCSR of memoryAddress (* Load combined control/status word. *)
| XMMStoreCSR of memoryAddress (* Store combined control/status word. *)
| FPStoreInt of memoryAddress
| XMMStoreInt of { source: xmmReg regOrMemoryArg, output: genReg, precision: fpSize, isTruncate: bool }
| CondMove of { test: branchOps, output: genReg, source: genReg regOrMemoryArg, opSize: opSize }
| LoadAbsolute of { destination: genReg, value: machineWord }
| PauseForSpinLock
and jumpSize = JumpSize2 | JumpSize8
type operations = operation list
fun printOperation(operation, stream) =
let
fun printGReg r = stream(genRegRepr(r, sz32_64))
val printFPReg = stream o fpRegRepr
and printXMMReg = stream o xmmRegRepr
fun printBaseOffset(b, x, i) =
(
stream(Int.toString i); stream "("; printGReg b; stream ")";
case x of
NoIndex => ()
| Index1 x => (stream "["; printGReg x; stream "]")
| Index2 x => (stream "["; printGReg x; stream "*2]")
| Index4 x => (stream "["; printGReg x; stream "*4]")
| Index8 x => (stream "["; printGReg x; stream "*8]")
)
fun printMemAddress({ base, offset, index }) = printBaseOffset(base, index, offset)
fun printRegOrMemoryArg printReg (RegisterArg r) = printReg r
| printRegOrMemoryArg _ (MemoryArg{ base, offset, index }) = printBaseOffset(base, index, offset)
| printRegOrMemoryArg _ (NonAddressConstArg c) = stream(LargeInt.toString c)
| printRegOrMemoryArg _ (AddressConstArg c) = stream(Address.stringOfWord c)
fun printOpSize OpSize32 = "32"
| printOpSize OpSize64 = "64"
in
case operation of
Move { source, destination, moveSize } =>
(
case moveSize of
Move64 => stream "Move64 "
| Move32 => stream "Move32 "
| Move8 => stream "Move8 "
| Move16 => stream "Move16 "
| Move32X64 => stream "Move32X64 "
| Move8X32 => stream "Move8X32 "
| Move8X64 => stream "Move8X64 "
| Move16X32 => stream "Move16X32 "
| Move16X64 => stream "Move16X64 ";
printRegOrMemoryArg printGReg destination; stream " <= "; printRegOrMemoryArg printGReg source
)
| ArithToGenReg { opc, output, source, opSize } =>
(stream (arithOpRepr opc); stream "RR"; stream(printOpSize opSize); stream " "; printGReg output; stream " <= "; printRegOrMemoryArg printGReg source )
| ArithMemConst { opc, address, source, opSize } =>
(
stream (arithOpRepr opc); stream "MC"; stream(printOpSize opSize); stream " ";
printMemAddress address;
stream " "; stream(LargeInt.toString source)
)
| ArithMemLongConst { opc, address, source } =>
(
stream (arithOpRepr opc ^ "MC "); printMemAddress address;
stream " <= "; stream(Address.stringOfWord source)
)
| ArithByteMemConst { opc, address, source } =>
(
stream (arithOpRepr opc); stream "MC8"; stream " ";
printMemAddress address; stream " "; stream(Word8.toString source)
)
| ShiftConstant { shiftType, output, shift, opSize } =>
(
stream(shiftTypeRepr shiftType); stream(printOpSize opSize); stream " "; printGReg output;
stream " by "; stream(Word8.toString shift)
)
| ShiftVariable { shiftType, output, opSize } => (* Shift amount is in ecx *)
(
stream(shiftTypeRepr shiftType); stream(printOpSize opSize); stream " "; printGReg output; stream " by ECX"
)
| ConditionalBranch { test, label=Label{labelNo, ...} } =>
(
stream "Jump"; stream(branchOpRepr test); stream " L"; stream(Int.toString labelNo)
)
| SetCondition { output, test } =>
(
stream "SetCC"; stream(branchOpRepr test); stream " => "; printGReg output
)
| PushToStack source => (stream "Push "; printRegOrMemoryArg printGReg source)
| PopR dest => (stream "PopR "; printGReg dest)
| LoadAddress{ output, offset, base, index, opSize } =>
(
stream "LoadAddress"; stream(printOpSize opSize); stream " ";
case base of NONE => () | SOME r => (printGReg r; stream " + ");
stream(Int.toString offset);
case index of
NoIndex => ()
| Index1 x => (stream " + "; printGReg x)
| Index2 x => (stream " + "; printGReg x; stream "*2 ")
| Index4 x => (stream " + "; printGReg x; stream "*4 ")
| Index8 x => (stream " + "; printGReg x; stream "*8 ");
stream " => "; printGReg output
)
| TestByteBits { arg, bits } =>
( stream "TestByteBits "; printRegOrMemoryArg printGReg arg; stream " 0x"; stream(Word8.toString bits) )
| CallRTS {rtsEntry, ...} =>
(
stream "CallRTS ";
case rtsEntry of
StackOverflowCall => stream "StackOverflowCall"
| HeapOverflowCall => stream "HeapOverflow"
| StackOverflowCallEx => stream "StackOverflowCallEx"
)
| AllocStore { size, output, ... } =>
(stream "AllocStore "; stream(Int.toString size); stream " => "; printGReg output )
| AllocStoreVariable { output, size, ...} =>
(stream "AllocStoreVariable "; printGReg size; stream " => "; printGReg output )
| StoreInitialised => stream "StoreInitialised"
| CallAddress source => (stream "CallAddress "; printRegOrMemoryArg printGReg source)
| JumpAddress source => (stream "JumpAddress "; printRegOrMemoryArg printGReg source)
| ReturnFromFunction argsToRemove =>
(stream "ReturnFromFunction "; stream(Int.toString argsToRemove))
| RaiseException { workReg } => (stream "RaiseException "; printGReg workReg)
| UncondBranch(Label{labelNo, ...})=>
(stream "UncondBranch L"; stream(Int.toString labelNo))
| ResetStack{numWords, preserveCC} =>
(stream "ResetStack "; stream(Int.toString numWords); if preserveCC then stream " preserve CC" else ())
| JumpLabel(Label{labelNo, ...}) =>
(stream "L"; stream(Int.toString labelNo); stream ":")
| LoadLabelAddress{ label=Label{labelNo, ...}, output } =>
(stream "LoadLabelAddress L"; stream(Int.toString labelNo); stream "=>"; printGReg output)
| RepeatOperation repOp => (stream "Repeat "; stream(repOpsRepr repOp))
| DivideAccR{arg, isSigned, opSize} =>
( stream(if isSigned then "DivideSigned" else "DivideUnsigned"); stream(printOpSize opSize); stream " "; printGReg arg)
| DivideAccM{base, offset, isSigned, opSize} =>
( stream(if isSigned then "DivideSigned" else "DivideUnsigned"); stream(printOpSize opSize); stream " "; printBaseOffset(base, NoIndex, offset))
| AtomicXAdd{address, output, opSize} =>
(stream "LockedXAdd"; stream(printOpSize opSize); printMemAddress address; stream " <=> "; printGReg output)
| AtomicCmpXChng{address, source, opSize} =>
(stream "LockedCmpXchng"; stream(printOpSize opSize); printMemAddress address; stream ", "; printGReg source)
| FPLoadFromMemory{address, precision=DoublePrecision} => (stream "FPLoadDouble "; printMemAddress address)
| FPLoadFromMemory{address, precision=SinglePrecision} => (stream "FPLoadSingle "; printMemAddress address)
| FPLoadFromFPReg {source, lastRef} =>
(stream "FPLoad "; printFPReg source; if lastRef then stream " (LAST)" else())
| FPLoadFromConst{constant, precision} =>
(
case precision of DoublePrecision => stream "FPLoadD " | SinglePrecision => stream "FPLoadS";
stream(Address.stringOfWord constant)
)
| FPStoreToFPReg{ output, andPop } =>
(if andPop then stream "FPStoreAndPop => " else stream "FPStore => "; printFPReg output)
| FPStoreToMemory{ address, precision=DoublePrecision, andPop: bool } =>
(
if andPop then stream "FPStoreDoubleAndPop => " else stream "FPStoreDouble => ";
printMemAddress address
)
| FPStoreToMemory{ address, precision=SinglePrecision, andPop: bool } =>
(
if andPop then stream "FPStoreSingleAndPop => " else stream "FPStoreSingle => ";
printMemAddress address
)
| FPArithR{ opc, source } => (stream(fpOpRepr opc); stream " "; printFPReg source)
| FPArithConst{ opc, source, precision } =>
(stream(fpOpRepr opc); case precision of DoublePrecision => stream "D " | SinglePrecision => stream "S "; stream(Address.stringOfWord source))
| FPArithMemory{ opc, base, offset, precision } =>
(stream(fpOpRepr opc); case precision of DoublePrecision => stream "D " | SinglePrecision => stream "S "; printBaseOffset(base, NoIndex, offset))
| FPUnary opc => stream(fpUnaryRepr opc)
| FPStatusToEAX => (stream "FPStatus "; printGReg eax)
| FPLoadInt { base, offset, opSize} =>
(stream "FPLoadInt"; stream(printOpSize opSize); stream " "; printBaseOffset(base, NoIndex, offset))
| FPFree reg => (stream "FPFree "; printFPReg reg)
| MultiplyR {source, output, opSize } =>
(stream "MultiplyR"; stream(printOpSize opSize); stream " "; printRegOrMemoryArg printGReg source; stream " *=>"; printGReg output)
| XMMArith { opc, source, output } =>
(
stream (sse2OpRepr opc ^ "RM "); printXMMReg output; stream " <= "; printRegOrMemoryArg printXMMReg source
)
| XMMStoreToMemory { toStore, address, precision=DoublePrecision } =>
(
stream "MoveDouble "; printXMMReg toStore; stream " => "; printMemAddress address
)
| XMMStoreToMemory { toStore, address, precision=SinglePrecision } =>
(
stream "MoveSingle "; printXMMReg toStore; stream " => "; printMemAddress address
)
| XMMConvertFromInt { source, output, opSize, precision } =>
(
stream (case precision of DoublePrecision => "ConvertFromIntToDouble " | SinglePrecision => "ConvertFromIntToSingle ");
stream(printOpSize opSize); stream " "; printGReg source; stream " => "; printXMMReg output
)
| SignExtendForDivide opSize => ( stream "SignExtendForDivide"; stream(printOpSize opSize) )
| XChng { reg, arg, opSize } =>
(stream "XChng"; stream(printOpSize opSize); stream " "; printGReg reg; stream " <=> "; printRegOrMemoryArg printGReg arg)
| Negative { output, opSize } =>
(stream "Negative"; stream(printOpSize opSize); stream " "; printGReg output)
| JumpTable{cases, ...} =>
List.app(fn(Label{labelNo, ...}) => (stream "UncondBranch L"; stream(Int.toString labelNo); stream "\n")) cases
| IndexedJumpCalc { addrReg, indexReg, jumpSize=ref jumpSize } =>
(
stream "IndexedJumpCalc "; printGReg addrReg; stream " += "; printGReg indexReg;
stream (case jumpSize of JumpSize2 => " * 2" | JumpSize8 => " * 8 ")
)
| MoveXMMRegToGenReg { source, output } =>
(
stream "MoveXMMRegToGenReg "; printXMMReg source; stream " => "; printGReg output
)
| MoveGenRegToXMMReg { source, output } =>
(
stream "MoveGenRegToXMMReg "; printGReg source; stream " => "; printXMMReg output
)
| XMMShiftRight { output, shift } =>
(
stream "XMMShiftRight "; printXMMReg output; stream " by "; stream(Word8.toString shift)
)
| FPLoadCtrlWord address =>
(
stream "FPLoadCtrlWord "; stream " => "; printMemAddress address
)
| FPStoreCtrlWord address =>
(
stream "FPStoreCtrlWord "; stream " <= "; printMemAddress address
)
| XMMLoadCSR address =>
(
stream "XMMLoadCSR "; stream " => "; printMemAddress address
)
| XMMStoreCSR address =>
(
stream "XMMStoreCSR "; stream " <= "; printMemAddress address
)
| FPStoreInt address =>
(
stream "FPStoreInt "; stream " <= "; printMemAddress address
)
| XMMStoreInt{ source, output, precision, isTruncate } =>
(
stream "XMMStoreInt";
case precision of SinglePrecision => stream "Single" | DoublePrecision => stream "Double";
if isTruncate then stream "Truncate " else stream " ";
printGReg output; stream " <= "; printRegOrMemoryArg printXMMReg source
)
| CondMove { test, output, source, opSize } =>
(
stream "CondMove"; stream(branchOpRepr test); stream(printOpSize opSize);
printGReg output; stream " <= "; printRegOrMemoryArg printGReg source
)
| LoadAbsolute { destination, value } =>
( stream "LoadAbsolute "; printGReg destination; stream " <= "; stream(Address.stringOfWord value) )
| PauseForSpinLock => stream "PauseForSpinLock"
;
stream "\n"
end
datatype implement = ImplementGeneral | ImplementLiteral of machineWord
fun printLowLevelCode(ops, Code{printAssemblyCode, printStream, procName, ...}) =
if printAssemblyCode
then
(
if procName = "" (* No name *) then printStream "?" else printStream procName;
printStream ":\n";
List.app(fn i => printOperation(i, printStream)) ops;
printStream "\n"
)
else ()
-
-(* val opLen = if isX64 then OpSize64 else OpSize32 *)
+
+ val isLocalCodeAddress: machineWord -> bool = RunCall.rtsCallFast1 "PolyX86IsLocalCode"
(* Code generate a list of operations. The list is in reverse order i.e. last instruction first. *)
fun codeGenerate ops =
let
fun cgOp(Move{source=RegisterArg source, destination=RegisterArg output, moveSize=Move64 }) =
(* Move from one general register to another. N.B. Because we're using the
"store" version of the Move the source and output are reversed. *)
opReg(MOVL_R_A64, source, output)
| cgOp(Move{source=RegisterArg source, destination=RegisterArg output, moveSize=Move32 }) =
opReg(MOVL_R_A32, source, output)
| cgOp(Move{ source=NonAddressConstArg source, destination=RegisterArg output, moveSize=Move64}) =
if targetArch <> Native32Bit
then
(
(* N.B. There is related code in getConstant that deals with PC-relative values and
also checks the range of constants that need to be in the constant area. *)
if source >= 0 andalso source < 0x100000000
then (* Unsigned 32 bits. We can use a 32-bit instruction to set the
value because it will zero extend to 64-bits.
This may also allow us to save a rex byte. *)
let
val (rc, rx) = getReg output
val opb = opCodeBytes(MOVL_32_R rc, if rx then SOME{w=false, r=false, b=rx, x=false} else NONE)
in
opb @ word32Unsigned(LargeWord.fromLargeInt source)
end
else if source >= ~0x80000000 andalso source < 0
then (* Signed 32-bits. *)
(* This is not scanned in 64-bit mode because 32-bit values aren't
big enough to contain addresses. *)
opRegPlus2(MOVL_32_A64, output, 0w0) @ int32Signed source
else (* Too big for 32-bits; put it in the non-word area. *)
opConstantOperand(MOVL_A_R64, output)
)
else (* 32-bit mode. *)
(
(* The RTS scans for possible addresses in MOV instructions so we
can only use MOV if this is a tagged value. If it isn't we have
to use something else such as XOR/ADD. In particular this is used
before LOCK XADD for atomic inc/dec.
We expect Move to preserve the CC so shouldn't use anything that
affects it. There was a previous comment that said that using
LEA wasn't a good idea. Perhaps because it takes 6 bytes. *)
if source mod 2 = 0
then opIndexed(LEAL32, source, NONE, NoIndex, output)
else
let
val (rc, rx) = getReg output
val opb = opCodeBytes(MOVL_32_R rc, if rx then SOME{w=false, r=false, b=rx, x=false} else NONE)
in
opb @ int32Signed source
end
)
| cgOp(Move{ source=NonAddressConstArg source, destination=RegisterArg output, moveSize=Move32}) =
if targetArch <> Native32Bit
then
(
(* N.B. There is related code in getConstant that deals with PC-relative values and
also checks the range of constants that need to be in the constant area. *)
if source >= 0 andalso source < 0x100000000
then (* Unsigned 32 bits. We can use a 32-bit instruction to set the
value because it will zero extend to 64-bits.
This may also allow us to save a rex byte. *)
let
val (rc, rx) = getReg output
val opb = opCodeBytes(MOVL_32_R rc, if rx then SOME{w=false, r=false, b=rx, x=false} else NONE)
in
opb @ word32Unsigned(LargeWord.fromLargeInt source)
end
else if source >= ~0x80000000 andalso source < 0
then (* Signed 32-bits. *)
(* This is not scanned in 64-bit mode because 32-bit values aren't
big enough to contain addresses. *)
opRegPlus2(MOVL_32_A64, output, 0w0) @ int32Signed source
else (* Too big for 32-bits; put it in the non-word area. *)
opConstantOperand(MOVL_A_R64, output)
)
else (* 32-bit mode. *)
(
(* The RTS scans for possible addresses in MOV instructions so we
can only use MOV if this is a tagged value. If it isn't we have
to use something else such as XOR/ADD. In particular this is used
before LOCK XADD for atomic inc/dec.
We expect Move to preserve the CC so shouldn't use anything that
affects it. There was a previous comment that said that using
LEA wasn't a good idea. Perhaps because it takes 6 bytes. *)
if source mod 2 = 0
then opIndexed(LEAL32, source, NONE, NoIndex, output)
else
let
val (rc, rx) = getReg output
val opb = opCodeBytes(MOVL_32_R rc, if rx then SOME{w=false, r=false, b=rx, x=false} else NONE)
in
opb @ int32Signed source
end
)
| cgOp(Move{ source=AddressConstArg _, destination=RegisterArg output, moveSize=Move64 }) =
(
(* The constant area is currently PolyWords. That means we MUST use
a 32-bit load in 32-in-64. *)
targetArch = Native64Bit orelse raise InternalError "Move64 in 32-bit";
(* Put address constants in the constant area. *)
opConstantOperand(MOVL_A_R64, output)
)
| cgOp(Move{ source=AddressConstArg _, destination=RegisterArg output, moveSize=Move32 }) =
(
case targetArch of
Native64Bit => raise InternalError "Move32 - AddressConstArg"
| ObjectId32Bit =>
(* Put address constants in the constant area. *)
(* The constant area is currently PolyWords. That means we MUST use
a 32-bit load in 32-in-64. *)
opConstantOperand(MOVL_A_R32, output)
| Native32Bit =>
(* Immediate constant *)
let
val (rc, _) = getReg output
in
opCodeBytes(MOVL_32_R rc, NONE) @ int32Signed(tag 0)
end
)
| cgOp(LoadAbsolute{ destination, ... }) =
(
(* Immediate address constant. This is currently only used the special case of loading
the address of PolyX86GetThreadData in a callback when we don't have rbx in 32-in-64. *)
case targetArch of
Native32Bit =>
let
val (rc, _) = getReg destination
in
opCodeBytes(MOVL_32_R rc, NONE) @ int32Signed(tag 0)
end
| Native64Bit => opConstantOperand(MOVL_A_R64, destination)
| ObjectId32Bit =>
let
val (rc, rx) = getReg destination
in
opCodeBytes(MOVL_64_R rc, SOME{w=true, r=false, b=rx, x=false}) @ largeWordToBytes(LargeWord.fromLargeInt(tag 0), 8)
end
)
| cgOp(Move{source=MemoryArg{base, offset, index}, destination=RegisterArg output, moveSize=Move32 }) =
opAddress(MOVL_A_R32, LargeInt.fromInt offset, base, index, output)
| cgOp(Move{source=MemoryArg{base, offset, index}, destination=RegisterArg output, moveSize=Move64 }) =
opAddress(MOVL_A_R64, LargeInt.fromInt offset, base, index, output)
| cgOp(Move{source=MemoryArg{base, offset, index}, destination=RegisterArg output, moveSize=Move8 }) =
(* We don't need a REX.W bit here because the top 32-bits of a
64-bit register will always be zeroed. *)
opAddress(MOVZB, LargeInt.fromInt offset, base, index, output)
| cgOp(Move{source=RegisterArg source, destination=RegisterArg output, moveSize=Move8 }) =
let
(* Zero extend an 8-bit value in a register to 32/64 bits. *)
val (rrC, rrX) = getReg output
val (rbC, rbX) = getReg source
(* We don't need a REX.W bit here because the top 32-bits of a
64-bit register will always be zeroed but we may need a REX byte
if we're using esi or edi. *)
val rexByte =
if rrC < 0w4 andalso not rrX andalso not rbX
then NONE
else if hostIsX64
then SOME {w=false, r=rrX, b=rbX, x=false}
else raise InternalError "Move8 with esi/edi"
in
opCodeBytes(MOVZB, rexByte) @ [modrm(Register, rrC, rbC)]
end
| cgOp(Move{source=MemoryArg{base, offset, index}, destination=RegisterArg output, moveSize=Move8X32 }) =
opAddress(MOVSXB32, LargeInt.fromInt offset, base, index, output)
| cgOp(Move{source=MemoryArg{base, offset, index}, destination=RegisterArg output, moveSize=Move8X64 }) =
(* But we will need a Rex.W here. *)
opAddress(MOVSXB64, LargeInt.fromInt offset, base, index, output)
| cgOp(Move{moveSize=Move16, source=MemoryArg{base, offset, index}, destination=RegisterArg output }) =
(* No need for Rex.W *)
opAddress(MOVZW, LargeInt.fromInt offset, base, index, output)
| cgOp(Move{moveSize=Move16X32, source=MemoryArg{base, offset, index}, destination=RegisterArg output }) =
opAddress(MOVSXW32, LargeInt.fromInt offset, base, index, output)
| cgOp(Move{moveSize=Move16X64, source=MemoryArg{base, offset, index}, destination=RegisterArg output }) =
(* But we do need Rex.W here *)
opAddress(MOVSXW64, LargeInt.fromInt offset, base, index, output)
| cgOp(Move{moveSize=Move32X64, source=RegisterArg source, destination=RegisterArg output }) =
(* We should have a REX.W bit here. *)
opReg(MOVSXD, output, source)
| cgOp(Move{moveSize=Move32X64, source=MemoryArg{base, offset, index}, destination=RegisterArg output }) =
(* We should have a REX.W bit here. *)
opAddress(MOVSXD, LargeInt.fromInt offset, base, index, output)
| cgOp(Move{moveSize=Move32X64, ...}) = raise InternalError "cgOp: LoadNonWord Size32Bit"
| cgOp(LoadAddress{ offset, base, index, output, opSize }) =
(* This provides a mixture of addition and multiplication in a single
instruction. *)
opIndexed(case opSize of OpSize64 => LEAL64 | OpSize32 => LEAL32, LargeInt.fromInt offset, base, index, output)
| cgOp(ArithToGenReg{ opc, output, source=RegisterArg source, opSize }) =
arithOpReg (opc, output, source, opSize=OpSize64)
| cgOp(ArithToGenReg{ opc, output, source=NonAddressConstArg source, opSize }) =
let
(* On the X86/32 we use CMP with literal sources to compare with an
address and the RTS searches for them in the code. Any
non-address constant must be tagged. Most will be but we
might want to use this to compare with the contents of a
LargeWord value. *)
val _ =
if hostIsX64 orelse is8BitL source orelse opc <> CMP orelse IntInf.andb(source, 1) = 1
then ()
else raise InternalError "CMP with constant that looks like an address"
in
immediateOperand(opc, output, source, opSize)
end
| cgOp(ArithToGenReg{ opc, output, source=AddressConstArg _, opSize }) =
(* This is only used for opc=CMP to compare addresses for equality. *)
if hostIsX64
then (* We use this in 32-in-64 as well as native 64-bit. *)
opConstantOperand(
(case opSize of OpSize64 => Arith64 | OpSize32 => Arith32) (opc, 0w3), output)
else
let
val (rc, _) = getReg output
val opb = opCodeBytes(Group1_32_A32 (* group1, 32 bit immediate *), NONE)
val mdrm = modrm(Register, arithOpToWord opc, rc)
in
opb @ [mdrm] @ int32Signed(tag 0)
end
| cgOp(ArithToGenReg{ opc, output, source=MemoryArg{offset, base, index}, opSize }) =
opAddress((case opSize of OpSize64 => Arith64 | OpSize32 => Arith32) (opc, 0w3),
LargeInt.fromInt offset, base, index, output)
| cgOp(ArithByteMemConst{ opc, address={offset, base, index}, source }) =
opIndexedPlus2(Group1_8_a (* group1, 8 bit immediate *),
LargeInt.fromInt offset, base, index, arithOpToWord opc) @ [source]
| cgOp(ArithMemConst{ opc, address={offset, base, index}, source, opSize }) =
if is8BitL source
then (* Can use one byte immediate *)
opIndexedPlus2(case opSize of OpSize64 => Group1_8_A64 | OpSize32 => Group1_8_A32 (* group1, 8 bit immediate *),
LargeInt.fromInt offset, base, index, arithOpToWord opc) @ [Word8.fromLargeInt source]
else (* Need 32 bit immediate. *)
opIndexedPlus2(case opSize of OpSize64 => Group1_32_A64 | OpSize32 => Group1_32_A32(* group1, 32 bit immediate *),
LargeInt.fromInt offset, base, index, arithOpToWord opc) @ int32Signed source
| cgOp(ArithMemLongConst{ opc, address={offset, base, index}, ... }) =
(* Currently this is always a comparison. It is only valid in 32-bit mode because
the constant is only 32-bits. *)
if hostIsX64
then raise InternalError "ArithMemLongConst in 64-bit mode"
else
let
val opb = opIndexedPlus2 (Group1_32_A32, LargeInt.fromInt offset, base, index, arithOpToWord opc)
in
opb @ int32Signed(tag 0)
end
| cgOp(ShiftConstant { shiftType, output, shift, opSize }) =
if shift = 0w1
then opRegPlus2(case opSize of OpSize64 => Group2_1_A64 | OpSize32 => Group2_1_A32, output, shiftTypeToWord shiftType)
else opRegPlus2(case opSize of OpSize64 => Group2_8_A64 | OpSize32 => Group2_8_A32, output, shiftTypeToWord shiftType) @ [shift]
| cgOp(ShiftVariable { shiftType, output, opSize }) =
opRegPlus2(case opSize of OpSize64 => Group2_CL_A64 | OpSize32 => Group2_CL_A32, output, shiftTypeToWord shiftType)
| cgOp(TestByteBits{arg=RegisterArg reg, bits}) =
let
(* Test the bottom bit and jump depending on its value. This is used
for tag tests in arbitrary precision operations and also for testing
for short/long values. *)
val (regNum, rx) = getReg reg
in
if reg = eax
then (* Special instruction for testing accumulator. Can use an 8-bit test. *)
opCodeBytes(TEST_ACC8, NONE) @ [bits]
else if hostIsX64
then
let
(* We can use a REX code to force it to always use the low order byte. *)
val opb = opCodeBytes(Group3_a,
if rx orelse regNum >= 0w4 then SOME{w=false, r=false, b=rx, x=false} else NONE)
val mdrm = modrm (Register, 0w0 (* test *), regNum)
in
opb @ [mdrm, bits]
end
else if reg = ebx orelse reg = ecx orelse reg = edx (* can we use an 8-bit test? *)
then (* Yes. The register value refers to low-order byte. *)
let
val opb = opCodeBytes(Group3_a, NONE)
val mdrm = modrm(Register, 0w0 (* test *), regNum)
in
opb @ [mdrm, bits]
end
else
let
val opb = opCodeBytes(Group3_A32, NONE)
val mdrm = modrm (Register, 0w0 (* test *), regNum)
in
opb @ mdrm :: word32Unsigned(Word8.toLarge bits)
end
end
| cgOp(TestByteBits{arg=MemoryArg{base, offset, index}, bits}) =
(* Test the tag bit and set the condition code. *)
opIndexedPlus2(Group3_a, LargeInt.fromInt offset, base, index, 0w0 (* test *)) @ [ bits]
| cgOp(TestByteBits _) = raise InternalError "cgOp: TestByteBits"
| cgOp(ConditionalBranch{ test=opc, ... }) = opCodeBytes(CondJump32 opc, NONE) @ word32Unsigned 0w0
| cgOp(SetCondition{ output, test}) =
let
val (rrC, rx) = getReg output
(* In 64-bit mode we can specify the low-order byte of RSI/RDI but we
must use a REX prefix. This isn't possible in 32-bit mode. *)
in
if hostIsX64 orelse rrC < 0w4
then
let
val opb = opCodeBytes(SetCC test,
if rx orelse rrC >= 0w4 then SOME{w=false, r=false, b=rx, x=false} else NONE)
val mdrm = modrm (Register, 0w0, rrC)
in
opb @ [mdrm]
end
else raise InternalError "High byte register"
end
| cgOp(CallRTS{rtsEntry, saveRegs}) = rtsCall(rtsEntry, saveRegs)
| cgOp(RepeatOperation repOp) =
let
(* We don't explicitly clear the direction flag. Should that be done? *)
val opb = opCodeBytes(REP, NONE)
(* Put in a rex prefix to force 64-bit mode. *)
val optRex =
if case repOp of STOS64 => true | MOVS64 => true | _ => false
then [rex{w=true, r=false, b=false, x=false}]
else []
val repOp = repOpsToWord repOp
in
opb @ optRex @ [repOp]
end
| cgOp(DivideAccR{arg, isSigned, opSize}) =
opRegPlus2(case opSize of OpSize64 => Group3_A64 | OpSize32 => Group3_A32, arg, if isSigned then 0w7 else 0w6)
| cgOp(DivideAccM{base, offset, isSigned, opSize}) =
opPlus2(case opSize of OpSize64 => Group3_A64 | OpSize32 => Group3_A32, LargeInt.fromInt offset, base, if isSigned then 0w7 else 0w6)
| cgOp(AtomicXAdd{address={offset, base, index}, output, opSize}) =
(* Locked exchange-and-add. We need the lock prefix before the REX prefix. *)
opAddress(case opSize of OpSize64 => LOCK_XADD64 | OpSize32 => LOCK_XADD32, LargeInt.fromInt offset, base, index, output)
| cgOp(AtomicCmpXChng{address={offset, base, index}, source, opSize}) =
(* Locked compare and exchange. *)
opAddress(case opSize of OpSize64 => LOCK_CMPXCHG64 | OpSize32 => LOCK_CMPXCHG32, LargeInt.fromInt offset,
base, index, source)
| cgOp(PushToStack(RegisterArg reg)) =
let
val (rc, rx) = getReg reg
in
(* Always 64-bit but a REX prefix may be needed for the register. *)
opCodeBytes(PUSH_R rc, if rx then SOME{w=false, b = true, x=false, r = false } else NONE)
end
| cgOp(PushToStack(MemoryArg{base, offset, index})) =
opAddressPlus2(Group5, LargeInt.fromInt offset, base, index, 0w6 (* push *))
| cgOp(PushToStack(NonAddressConstArg constnt)) =
if is8BitL constnt
then opCodeBytes(PUSH_8, NONE) @ [Word8.fromLargeInt constnt]
else if is32bit constnt
then opCodeBytes(PUSH_32, NONE) @ int32Signed constnt
else (* It won't fit in the immediate; put it in the non-address area. *)
let
val opb = opCodeBytes(Group5, NONE)
val mdrm = modrm(Based0, 0w6 (* push *), 0w5 (* PC rel *))
in
opb @ [mdrm] @ int32Signed(tag 0)
end
| cgOp(PushToStack(AddressConstArg _)) =
(
case targetArch of
Native64Bit => (* Put it in the constant area. *)
let
val opb = opCodeBytes(Group5, NONE)
val mdrm = modrm(Based0, 0w6 (* push *), 0w5 (* PC rel *));
in
opb @ [mdrm] @ int32Signed(tag 0)
end
| Native32Bit => opCodeBytes(PUSH_32, NONE) @ int32Signed(tag 0)
| ObjectId32Bit =>
(* We can't do this. The constant area contains 32-bit quantities
and 32-bit literals are sign-extended rather than zero-extended. *)
raise InternalError "PushToStack:AddressConstArg"
)
| cgOp(PopR reg ) =
let
val (rc, rx) = getReg reg
in
(* Always 64-bit but a REX prefix may be needed for the register.
Because the register is encoded in the instruction the rex bit for
the register is b not r. *)
opCodeBytes(POP_R rc, if rx then SOME{w=false, b = true, x=false, r = false } else NONE)
end
| cgOp(Move{source=RegisterArg toStore, destination=MemoryArg{offset, base, index}, moveSize=Move64}) =
opAddress(MOVL_R_A64, LargeInt.fromInt offset, base, index, toStore)
| cgOp(Move{source=RegisterArg toStore, destination=MemoryArg{offset, base, index}, moveSize=Move32}) =
opAddress(MOVL_R_A32, LargeInt.fromInt offset, base, index, toStore)
| cgOp(Move{source=NonAddressConstArg toStore, destination=MemoryArg{offset, base, index}, moveSize=Move64 }) =
(
(* Short constant. In 32-bit mode this is scanned as a possible address. That means
we can't have an untagged constant in it. That's not a problem in 64-bit mode.
There's a special check for using this to set the length word on newly allocated
memory. *)
targetArch <> Native32Bit orelse toStore = 0 orelse toStore mod 2 = 1 orelse offset = ~ (Word.toInt wordSize)
orelse raise InternalError "cgOp: StoreConstToMemory not tagged";
opAddressPlus2(MOVL_32_A64, LargeInt.fromInt offset, base, index, 0w0) @ int32Signed toStore
)
| cgOp(Move{source=NonAddressConstArg toStore, destination=MemoryArg{offset, base, index}, moveSize=Move32 }) =
(
(* Short constant. In 32-bit mode this is scanned as a possible address. That means
we can't have an untagged constant in it. That's not a problem in 64-bit mode.
There's a special check for using this to set the length word on newly allocated
memory. *)
targetArch <> Native32Bit orelse toStore = 0 orelse toStore mod 2 = 1 orelse offset = ~ (Word.toInt wordSize)
orelse raise InternalError "cgOp: StoreConstToMemory not tagged";
opAddressPlus2(MOVL_32_A32, LargeInt.fromInt offset, base, index, 0w0) @ int32Signed toStore
)
| cgOp(Move{source=AddressConstArg _, destination=MemoryArg{offset, base, index}, moveSize=Move32}) =
(* This is not used for addresses even in 32-in-64. We don't scan for addresses after MOVL_32_A. *)
if targetArch <> Native32Bit
then raise InternalError "StoreLongConstToMemory in 64-bit mode"
else opAddressPlus2(MOVL_32_A32, LargeInt.fromInt offset, base, index, 0w0) @ int32Signed (tag 0)
| cgOp(Move{source=AddressConstArg _, destination=MemoryArg _, ...}) =
raise InternalError "cgOp: Move - AddressConstArg => MemoryArg"
| cgOp(Move{ moveSize = Move8, source=RegisterArg toStore, destination=MemoryArg{offset, base, index} }) =
let
val (rrC, _) = getReg toStore
(* In 64-bit mode we can specify the low-order byte of RSI/RDI but we
must use a REX prefix. This isn't possible in 32-bit mode. *)
val opcode =
if hostIsX64 then MOVB_R_A64{forceRex= rrC >= 0w4}
else if rrC < 0w4 then MOVB_R_A32
else raise InternalError "High byte register"
in
opAddress(opcode, LargeInt.fromInt offset, base, index, toStore)
end
| cgOp(Move{ moveSize = Move16, source=RegisterArg toStore, destination=MemoryArg{offset, base, index}}) =
opAddress(MOVL_R_A16, LargeInt.fromInt offset, base, index, toStore)
| cgOp(Move{ moveSize = Move8, source=NonAddressConstArg toStore, destination=MemoryArg{offset, base, index}}) =
opAddressPlus2(MOVB_8_A, LargeInt.fromInt offset, base, index, 0w0) @
[Word8.fromLargeInt toStore]
| cgOp(Move _) = raise InternalError "Move: Unimplemented arguments"
(* Allocation is dealt with by expanding the code. *)
| cgOp(AllocStore _) = raise InternalError "cgOp: AllocStore"
| cgOp(AllocStoreVariable _) = raise InternalError "cgOp: AllocStoreVariable"
| cgOp StoreInitialised = raise InternalError "cgOp: StoreInitialised"
| cgOp(CallAddress(NonAddressConstArg _)) = (* Call to the start of the code. Offset is patched in later. *)
opCodeBytes (CALL_32, NONE) @ int32Signed 0
- | cgOp(CallAddress(AddressConstArg _)) =
- if targetArch = Native64Bit
+ | cgOp(CallAddress(AddressConstArg address)) =
+ if targetArch = Native64Bit andalso not (isLocalCodeAddress address)
then
let
val opc = opCodeBytes(Group5, NONE)
val mdrm = modrm(Based0, 0w2 (* call *), 0w5 (* PC rel *))
in
opc @ [mdrm] @ int32Signed(tag 0)
end
(* Because this is a relative branch we need to point this at itself.
Until it is set to the relative offset of the destination it
needs to contain an address within the code and this could
be the last instruction. *)
else opCodeBytes (CALL_32, NONE) @ int32Signed ~5
| cgOp(CallAddress(RegisterArg reg)) = opRegPlus2(Group5, reg, 0w2 (* call *))
| cgOp(CallAddress(MemoryArg{base, offset, index})) =
opAddressPlus2(Group5, LargeInt.fromInt offset, base, index, 0w2 (* call *))
| cgOp(JumpAddress(NonAddressConstArg _)) =
(* Jump to the start of the current function. Offset is patched in later. *)
opCodeBytes (JMP_32, NONE) @ int32Signed 0
- | cgOp(JumpAddress (AddressConstArg _)) =
- if targetArch = Native64Bit
+ | cgOp(JumpAddress (AddressConstArg address)) =
+ if targetArch = Native64Bit andalso not (isLocalCodeAddress address)
then
let
val opb = opCodeBytes (Group5, NONE)
val mdrm = modrm(Based0, 0w4 (* jmp *), 0w5 (* PC rel *))
in
opb @ [mdrm] @ int32Signed(tag 0)
end
else opCodeBytes (JMP_32, NONE) @ int32Signed ~5 (* As with Call. *)
| cgOp(JumpAddress (RegisterArg reg)) =
(* Used as part of indexed case - not for entering a function. *)
opRegPlus2(Group5, reg, 0w4 (* jmp *))
| cgOp(JumpAddress(MemoryArg{base, offset, index})) =
opAddressPlus2(Group5, LargeInt.fromInt offset, base, index, 0w4 (* jmp *))
| cgOp(ReturnFromFunction args) =
if args = 0
then opCodeBytes(RET, NONE)
else
let
val offset = Word.fromInt args * nativeWordSize
in
opCodeBytes(RET_16, NONE) @ [wordToWord8 offset, wordToWord8(offset >> 0w8)]
end
| cgOp (RaiseException { workReg }) =
opEA(if hostIsX64 then MOVL_A_R64 else MOVL_A_R32, LargeInt.fromInt memRegHandlerRegister, ebp, workReg) @
opAddressPlus2(Group5, 0, workReg, NoIndex, 0w4 (* jmp *))
| cgOp(UncondBranch _) = opToInt JMP_32 :: word32Unsigned 0w0
| cgOp(ResetStack{numWords, preserveCC}) =
let
val bytes = Word.toLargeInt(Word.fromInt numWords * nativeWordSize)
in
(* If we don't need to preserve the CC across the reset we use ADD since
it's shorter. *)
if preserveCC
then opEA(if hostIsX64 then LEAL64 else LEAL32, bytes, esp, esp)
else immediateOperand(ADD, esp, bytes, if hostIsX64 then OpSize64 else OpSize32)
end
| cgOp(JumpLabel _) = [] (* No code. *)
| cgOp(LoadLabelAddress{ output, ... }) =
(* Load the address of a label. Used when setting up an exception handler or
in indexed cases. *)
(* On X86/64 we can use pc-relative addressing to set the start of the handler.
On X86/32 we have to load the address of the start of the code and add an offset. *)
if hostIsX64
then opConstantOperand(LEAL64, output)
else
let
val (rc, _) = getReg output
in
opCodeBytes(MOVL_32_R rc , NONE) @ int32Signed(tag 0) @
opRegPlus2(Group1_32_A32, output, arithOpToWord ADD) @ int32Signed 0
end
| cgOp (FPLoadFromMemory {address={ base, offset, index }, precision}) =
let
val loadInstr =
case precision of
DoublePrecision => FPESC 0w5
| SinglePrecision => FPESC 0w1
in
opAddressPlus2(loadInstr, LargeInt.fromInt offset, base, index, 0wx0)
end
| cgOp (FPLoadFromFPReg{source=FloatingPtReg fp, ...}) =
(* Assume there's nothing currently on the stack. *)
floatingPtOp({escape=0w1, md=0w3, nnn=0w0, rm= fp + 0w0}) (* FLD ST(r1) *)
| cgOp (FPLoadFromConst {precision, ...} ) =
(* The real constant here is actually the address of a memory
object. FLD takes the address as the argument and in 32-bit mode
we use an absolute address. In 64-bit mode we need to put the
constant at the end of the code segment and use PC-relative
addressing which happens to be encoded in the same way.
There are special cases for zero and one but it's probably too
much work to detect them. *)
let
val esc = case precision of SinglePrecision => 0w1 | DoublePrecision => 0w5
val opb = opCodeBytes(FPESC esc, NONE) (* FLD [Constant] *)
val mdrm = modrm (Based0, 0w0, 0w5 (* constant address/PC-relative *))
in
opb @ [mdrm] @ int32Signed(tag 0)
end
| cgOp (FPStoreToFPReg{ output=FloatingPtReg dest, andPop }) =
(* Assume there's one item on the stack. *)
floatingPtOp({escape=0w5, md=0w3, nnn=if andPop then 0wx3 else 0wx2,
rm = dest+0w1(* One item *)}) (* FSTP ST(n+1) *)
| cgOp (FPStoreToMemory{address={ base, offset, index}, precision, andPop }) =
let
val storeInstr =
case precision of
DoublePrecision => FPESC 0w5
| SinglePrecision => FPESC 0w1
val subInstr = if andPop then 0wx3 else 0wx2
in
opAddressPlus2(storeInstr, LargeInt.fromInt offset, base, index, subInstr)
end
| cgOp (FPArithR{ opc, source = FloatingPtReg src}) =
floatingPtOp({escape=0w0, md=0w3, nnn=fpOpToWord opc,
rm=src + 0w1 (* One item already there *)})
| cgOp (FPArithConst{ opc, precision, ... }) =
(* See comment on FPLoadFromConst *)
let
val fpesc = case precision of DoublePrecision => 0w4 | SinglePrecision => 0w0
val opb = opCodeBytes(FPESC fpesc, NONE) (* FADD etc [constnt] *)
val mdrm = modrm (Based0, fpOpToWord opc, 0w5 (* constant address *))
in
opb @ [mdrm] @ int32Signed(tag 0)
end
| cgOp (FPArithMemory{ opc, base, offset, precision }) =
let
val fpesc = case precision of DoublePrecision => 0w4 | SinglePrecision => 0w0
in
opPlus2(FPESC fpesc, LargeInt.fromInt offset, base, fpOpToWord opc) (* FADD/FMUL etc [r2] *)
end
| cgOp (FPUnary opc ) =
let
val {rm, nnn} = fpUnaryToWords opc
in
floatingPtOp({escape=0w1, md=0w3, nnn=nnn, rm=rm}) (* FCHS etc *)
end
| cgOp (FPStatusToEAX ) =
opCodeBytes(FPESC 0w7, NONE) @ [0wxe0] (* FNSTSW AX *)
| cgOp (FPFree(FloatingPtReg reg)) =
floatingPtOp({escape=0w5, md=0w3, nnn=0w0, rm=reg}) (* FFREE FP(n) *)
| cgOp (FPLoadInt{base, offset, opSize=OpSize64}) =
(* fildl (esp) in 32-bit mode or fildq (esp) in 64-bit mode. *)
opPlus2(FPESC 0w7, LargeInt.fromInt offset, base, 0w5)
| cgOp (FPLoadInt{base, offset, opSize=OpSize32}) =
(* fildl (esp) in 32-bit mode or fildq (esp) in 64-bit mode. *)
opPlus2(FPESC 0w3, LargeInt.fromInt offset, base, 0w0)
| cgOp (MultiplyR {source=RegisterArg srcReg, output, opSize}) =
(* We use the 0F AF form of IMUL rather than the Group3 MUL or IMUL
because the former allows us to specify the destination register.
The Group3 forms produce double length results in RAX:RDX/EAX:EDX
but we only ever want the low-order half. *)
opReg(case opSize of OpSize64 => IMUL64 | OpSize32 => IMUL32 (* 2 byte opcode *), output, srcReg)
| cgOp (MultiplyR {source=MemoryArg{base, offset, index}, output, opSize}) =
(* This may be used for large-word multiplication. *)
opAddress(case opSize of OpSize64 => IMUL64 | OpSize32 => IMUL32 (* 2 byte opcode *), LargeInt.fromInt offset, base, index, output)
| cgOp(MultiplyR {source=NonAddressConstArg constnt, output, opSize}) =
(* If the constant is an 8-bit or 32-bit value we are actually using a
three-operand instruction where the argument can be a register or memory
and the destination register does not need to be the same as the source. *)
if is8BitL constnt
then opReg(case opSize of OpSize64 => IMUL_C8_64 | OpSize32 => IMUL_C8_32, output, output) @ [Word8.fromLargeInt constnt]
else if is32bit constnt
then opReg(case opSize of OpSize64 => IMUL_C32_64 | OpSize32 => IMUL_C32_32, output, output) @ int32Signed constnt
else opConstantOperand(case opSize of OpSize64 => IMUL64 | OpSize32 => IMUL32, output)
| cgOp(MultiplyR {source=AddressConstArg _, ...}) =
raise InternalError "Multiply - address constant"
| cgOp (XMMArith { opc, source=MemoryArg{base, offset, index}, output }) =
mMXAddress(SSE2Ops opc, LargeInt.fromInt offset, base, index, output)
| cgOp (XMMArith { opc, source=AddressConstArg _, output=SSE2Reg rrC }) =
let
(* The real constant here is actually the address of an 8-byte memory
object. In 32-bit mode we put this address into the code and retain
this memory object. In 64-bit mode we copy the real value out of the
memory object into the non-address constant area and use
PC-relative addressing. These happen to be encoded the same
way. *)
val opb = opCodeBytes(SSE2Ops opc, NONE)
val mdrm = modrm (Based0, rrC, 0w5 (* constant address/PC-relative *))
in
opb @ [mdrm] @ int32Signed(tag 0)
end
| cgOp (XMMArith { opc, source=RegisterArg(SSE2Reg rrS), output=SSE2Reg rrC }) =
let
val oper = SSE2Ops opc
val pref = opcodePrefix oper
val esc = escapePrefix oper
val opc = opToInt oper
val mdrm = modrm(Register, rrC, rrS)
in
pref @ esc @ [opc, mdrm]
end
| cgOp (XMMArith { opc, source=NonAddressConstArg _, output=SSE2Reg rrC }) =
let
val _ = hostIsX64 orelse raise InternalError "XMMArith-NonAddressConstArg in 32-bit mode"
(* This is currently used for 32-bit float arguments but can equally be
used for 64-bit values since the actual argument will always be put
in the 64-bit constant area. *)
val opb = opCodeBytes(SSE2Ops opc, NONE)
val mdrm = modrm (Based0, rrC, 0w5 (* constant address/PC-relative *))
in
opb @ [mdrm] @ int32Signed(tag 0)
end
| cgOp (XMMStoreToMemory { toStore, address={base, offset, index}, precision }) =
let
val oper =
case precision of
DoublePrecision => SSE2StoreDouble
| SinglePrecision => SSE2StoreSingle
in
mMXAddress(oper, LargeInt.fromInt offset, base, index, toStore)
end
| cgOp (XMMConvertFromInt { source, output=SSE2Reg rrC, opSize, precision }) =
let
(* The source is a general register and the output a XMM register. *)
(* TODO: The source can be a memory location. *)
val (rbC, rbX) = getReg source
val oper =
case (opSize, precision) of
(OpSize64, DoublePrecision) => CVTSI2SD64
| (OpSize32, DoublePrecision) => CVTSI2SD32
| (OpSize64, SinglePrecision) => CVTSI2SS64
| (OpSize32, SinglePrecision) => CVTSI2SS32
in
(* This is a special case with both an XMM and general register. *)
opcodePrefix oper @ rexByte(oper, false, rbX, false) @
escapePrefix oper @ [opToInt oper, modrm(Register, rrC, rbC)]
end
| cgOp (SignExtendForDivide OpSize64) =
opCodeBytes(CQO_CDQ64, SOME {w=true, r=false, b=false, x=false})
| cgOp (SignExtendForDivide OpSize32) =
opCodeBytes(CQO_CDQ32, NONE)
| cgOp (XChng { reg, arg=RegisterArg regY, opSize }) =
opReg(case opSize of OpSize64 => XCHNG64 | OpSize32 => XCHNG32, reg, regY)
| cgOp (XChng { reg, arg=MemoryArg{offset, base, index}, opSize }) =
opAddress(case opSize of OpSize64 => XCHNG64 | OpSize32 => XCHNG32, LargeInt.fromInt offset, base, index, reg)
| cgOp (XChng _) = raise InternalError "cgOp: XChng"
| cgOp (Negative {output, opSize}) =
opRegPlus2(case opSize of OpSize64 => Group3_A64 | OpSize32 => Group3_A32, output, 0w3 (* neg *))
| cgOp (JumpTable{cases, jumpSize=ref jumpSize}) =
let
val _ = jumpSize = JumpSize8 orelse raise InternalError "cgOp: JumpTable"
(* Make one jump for each case and pad it 8 bytes with Nops. *)
fun makeJump (_, l) = opToInt JMP_32 :: word32Unsigned 0w0 @ [opToInt NOP, opToInt NOP, opToInt NOP] @ l
in
List.foldl makeJump [] cases
end
| cgOp(IndexedJumpCalc{ addrReg, indexReg, jumpSize=ref jumpSize }) =
(
jumpSize = JumpSize8 orelse raise InternalError "cgOp: IndexedJumpCalc";
(* Should currently be JumpSize8 which requires a multiplier of 4 and
4 to be subtracted to remove the shifted tag. *)
opAddress(if hostIsX64 then LEAL64 else LEAL32, ~4, addrReg, Index4 indexReg, addrReg)
)
| cgOp(MoveXMMRegToGenReg { source=SSE2Reg rrC, output }) =
let
(* The source is a XMM register and the output a general register. *)
val (rbC, rbX) = getReg output
val oper = MOVDFromXMM
in
(* This is a special case with both an XMM and general register. *)
opcodePrefix oper @ rexByte(oper, false, rbX, false) @
escapePrefix oper @ [opToInt oper, modrm(Register, rrC, rbC)]
end
| cgOp(MoveGenRegToXMMReg { source, output=SSE2Reg rrC }) =
let
(* The source is a general register and the output a XMM register. *)
val (rbC, rbX) = getReg source
val oper = MOVQToXMM
in
(* This is a special case with both an XMM and general register. *)
(* This needs to move the whole 64-bit value. TODO: This is inconsistent
with MoveXMMRegToGenReg *)
opcodePrefix oper @ rexByte(oper, false, rbX, false) @
escapePrefix oper @ [opToInt oper, modrm(Register, rrC, rbC)]
end
| cgOp(XMMShiftRight { output=SSE2Reg rrC, shift }) =
let
val oper = PSRLDQ
in
opcodePrefix oper @ escapePrefix oper @ [opToInt oper, modrm(Register, 0w3, rrC), shift]
end
| cgOp(FPLoadCtrlWord {base, offset, index}) =
opIndexedPlus2(FPESC 0w1, LargeInt.fromInt offset, base, index, 0w5)
| cgOp(FPStoreCtrlWord {base, offset, index}) =
opIndexedPlus2(FPESC 0w1, LargeInt.fromInt offset, base, index, 0w7)
| cgOp(XMMLoadCSR {base, offset, index}) =
opIndexedPlus2(LDSTMXCSR, LargeInt.fromInt offset, base, index, 0w2)
| cgOp(XMMStoreCSR {base, offset, index}) =
opIndexedPlus2(LDSTMXCSR, LargeInt.fromInt offset, base, index, 0w3)
| cgOp(FPStoreInt {base, offset, index}) =
(* fistp dword ptr [esp] in 32-bit mode or fistp qword ptr [rsp] in 64-bit mode. *)
if hostIsX64
then opIndexedPlus2(FPESC 0w7, LargeInt.fromInt offset, base, index, 0w7)
else opIndexedPlus2(FPESC 0w3, LargeInt.fromInt offset, base, index, 0w3)
| cgOp(XMMStoreInt {source, output, precision, isTruncate}) =
let
(* The destination is a general register. The source is an XMM register or memory. *)
val (rbC, rbX) = getReg output
val oper =
case (hostIsX64, precision, isTruncate) of
(false, DoublePrecision, false) => CVTSD2SI32
| (true, DoublePrecision, false) => CVTSD2SI64
| (false, SinglePrecision, false) => CVTSS2SI32
| (true, SinglePrecision, false) => CVTSS2SI64
| (false, DoublePrecision, true) => CVTTSD2SI32
| (true, DoublePrecision, true) => CVTTSD2SI64
| (false, SinglePrecision, true) => CVTTSS2SI32
| (true, SinglePrecision, true) => CVTTSS2SI64
in
case source of
MemoryArg{base, offset, index} =>
opAddress(oper, LargeInt.fromInt offset, base, index, output)
| RegisterArg(SSE2Reg rrS) =>
opcodePrefix oper @ rexByte(oper, rbX, false, false) @
escapePrefix oper @ [opToInt oper, modrm(Register, rbC, rrS)]
| _ => raise InternalError "XMMStoreInt: Not register or memory"
end
| cgOp(CondMove { test, output, source=RegisterArg source, opSize=OpSize32 }) =
opReg(CMOV32 test, output, source)
| cgOp(CondMove { test, output, source=RegisterArg source, opSize=OpSize64 }) =
opReg(CMOV64 test, output, source)
| cgOp(CondMove { test, output, source=NonAddressConstArg _, opSize }) =
(
(* We currently support only native-64 bit and put the constant in the
non-address constant area. These are 64-bit values both in native
64-bit and in 32-in-64. To support it in 32-bit mode we'd have to
put the constant in a single-word object and put its absolute
address into the code. *)
targetArch <> Native32Bit orelse
raise InternalError "CondMove: constant in 32-bit mode";
opConstantOperand((case opSize of OpSize32 => CMOV32 | OpSize64 => CMOV64) test, output)
)
| cgOp(CondMove { test, output, source=AddressConstArg _, opSize=OpSize64 }) =
(* An address constant. The opSize must match the size of a polyWord since
the value it going into the constant area. *)
(
targetArch = Native64Bit orelse raise InternalError "CondMove: AddressConstArg";
opConstantOperand(CMOV64 test, output)
)
| cgOp(CondMove { test, output, source=AddressConstArg _, opSize=OpSize32 }) =
(
(* We only support address constants in 32-in-64. *)
targetArch = ObjectId32Bit orelse raise InternalError "CondMove: AddressConstArg";
opConstantOperand(CMOV32 test, output)
)
| cgOp(CondMove { test, output, source=MemoryArg{base, offset, index}, opSize=OpSize32 }) =
opAddress(CMOV32 test, LargeInt.fromInt offset, base, index, output)
| cgOp(CondMove { test, output, source=MemoryArg{base, offset, index}, opSize=OpSize64 }) =
opAddress(CMOV64 test, LargeInt.fromInt offset, base, index, output)
| cgOp PauseForSpinLock = opCodeBytes(PAUSE, NONE)
in
List.rev(List.foldl (fn (c, list) => Word8Vector.fromList(cgOp c) :: list) [] ops)
end
(* General function to process the code. ic is the byte counter within the original code. *)
fun foldCode foldFn n (ops, byteList) =
let
fun doFold(oper :: operList, bytes :: byteList, ic, acc) =
doFold(operList, byteList, ic + Word.fromInt(Word8Vector.length bytes),
foldFn(oper, bytes, ic, acc))
| doFold(_, _, _, n) = n
in
doFold(ops, byteList, 0w0, n)
end
(* Go through the code and update branch and similar instructions with the destinations
of the branches. Long branches are converted to short where possible and the code
is reprocessed. That might repeat if the effect of shorting one branch allows
another to be shortened. *)
fun fixupLabels(ops, bytesList, labelCount) =
let
(* Label array - initialise to 0wxff... . Every label should be defined
but just in case, this is more likely to be detected in int32Signed. *)
val labelArray = Array.array(labelCount, ~ 0w1)
(* First pass - Set the addresses of labels. *)
fun setLabelAddresses(oper :: operList, bytes :: byteList, ic) =
(
case oper of
JumpLabel(Label{labelNo, ...}) => Array.update(labelArray, labelNo, ic)
| _ => ();
setLabelAddresses(operList, byteList, ic + Word.fromInt(Word8Vector.length bytes))
)
| setLabelAddresses(_, _, ic) = ic (* Return the length of the code. *)
fun fixup32(destination, bytes, ic) =
let
val brLength = Word8Vector.length bytes
(* The offset is relative to the end of the branch instruction. *)
val diff = Word.toInt destination - Word.toInt ic - brLength
in
Word8VectorSlice.concat[
Word8VectorSlice.slice(bytes, 0, SOME(brLength-4)), (* The original opcode. *)
Word8VectorSlice.full(Word8Vector.fromList(int32Signed(LargeInt.fromInt diff)))
]
end
fun fixupAddress(UncondBranch(Label{labelNo, ...}), bytes, ic, list) =
let
val destination = Array.sub(labelArray, labelNo)
val brLength = Word8Vector.length bytes
(* The offset is relative to the end of the branch instruction. *)
val diff = Word.toInt destination - Word.toInt ic - brLength
in
if brLength = 2
then (* It's a short branch. Take the original operand and set the relative offset. *)
Word8Vector.fromList [opToInt JMP_8, byteSigned diff] :: list
else if brLength <> 5
then raise InternalError "fixupAddress"
else (* 32-bit offset. If it will fit in a byte we can use a short branch.
If this is a reverse branch we can actually use values up to -131
here because we've calculated using the end of the long branch. *)
if diff <= 127 andalso diff >= ~(128 + 3)
then Word8Vector.fromList [opToInt JMP_8, 0w0 (* Fixed on next pass *)] :: list
else Word8Vector.fromList(opToInt JMP_32 :: int32Signed(LargeInt.fromInt diff)) :: list
end
| fixupAddress(ConditionalBranch{label=Label{labelNo, ...}, test, ...}, bytes, ic, list) =
let
val destination = Array.sub(labelArray, labelNo)
val brLength = Word8Vector.length bytes
(* The offset is relative to the end of the branch instruction. *)
val diff = Word.toInt destination - Word.toInt ic - brLength
in
if brLength = 2
then (* It's a short branch. Take the original operand and set the relative offset. *)
Word8Vector.fromList [opToInt(CondJump test), byteSigned diff] :: list
else if brLength <> 6
then raise InternalError "fixupAddress"
else if diff <= 127 andalso diff >= ~(128+4)
then Word8Vector.fromList[opToInt(CondJump test), 0w0 (* Fixed on next pass *)] :: list
else Word8Vector.fromList(opCodeBytes(CondJump32 test, NONE) @ int32Signed(LargeInt.fromInt diff)) :: list
end
| fixupAddress(LoadLabelAddress{ label=Label{labelNo, ...}, ... }, brCode, ic, list) =
let
val destination = Array.sub(labelArray, labelNo)
in
if hostIsX64
then (* This is a relative offset on the X86/64. *)
fixup32(destination, brCode, ic) :: list
else (* On X86/32 the address is relative to the start of the code so we simply put in
the destination address. *)
Word8VectorSlice.concat[
Word8VectorSlice.slice(brCode, 0, SOME(Word8Vector.length brCode-4)),
Word8VectorSlice.full(Word8Vector.fromList(int32Signed(Word.toLargeInt destination)))] :: list
end
| fixupAddress(JumpTable{cases, jumpSize as ref JumpSize8}, brCode: Word8Vector.vector, ic, list) =
let
(* Each branch is a 32-bit jump padded up to 8 bytes. *)
fun processCase(Label{labelNo, ...} :: cases, offset, ic) =
fixup32(Array.sub(labelArray, labelNo),
Word8VectorSlice.vector(Word8VectorSlice.slice(brCode, offset, SOME 5)), ic) ::
Word8VectorSlice.vector(Word8VectorSlice.slice(brCode, offset+5, SOME 3)) ::
processCase(cases, offset+8, ic+0w8)
| processCase _ = []
(* Could we use short branches? If all of the branches were short the
table would be smaller so the offsets we use would be less.
Ignore backwards branches - could only occur if we have linked labels
in a loop. *)
val newStartOfCode = ic + Word.fromInt(List.length cases * 6)
fun tryShort(Label{labelNo, ...} :: cases, ic) =
let
val destination = Array.sub(labelArray, labelNo)
in
if destination > ic + 0w2 andalso destination - ic - 0w2 < 0w127
then tryShort(cases, ic+0w2)
else false
end
| tryShort _ = true
val newCases =
if tryShort(cases, newStartOfCode)
then
(
jumpSize := JumpSize2;
(* Generate a short branch table. *)
List.map(fn _ => Word8Vector.fromList [opToInt JMP_8, 0w0 (* Fixed on next pass *)]) cases
)
else processCase(cases, 0, ic)
in
Word8Vector.concat newCases :: list
end
| fixupAddress(JumpTable{cases, jumpSize=ref JumpSize2}, _, ic, list) =
let
(* Each branch is a short jump. *)
fun processCase(Label{labelNo, ...} :: cases, offset, ic) =
let
val destination = Array.sub(labelArray, labelNo)
val brLength = 2
val diff = Word.toInt destination - Word.toInt ic - brLength
in
Word8Vector.fromList[opToInt JMP_8, byteSigned diff] :: processCase(cases, offset+2, ic+0w2)
end
| processCase _ = []
in
Word8Vector.concat(processCase(cases, 0, ic)) :: list
end
(* If we've shortened a jump table we have to change the indexing. *)
| fixupAddress(IndexedJumpCalc{ addrReg, indexReg, jumpSize=ref JumpSize2 }, _, _, list) =
(* On x86/32 it might be shorter to use DEC addrReg; ADD addrReg, indexReg. *)
Word8Vector.fromList(opAddress(if hostIsX64 then LEAL64 else LEAL32, ~1, addrReg, Index1 indexReg, addrReg)) :: list
| fixupAddress(CallAddress(NonAddressConstArg _), brCode, ic, list) =
let
val brLen = Word8Vector.length brCode
in
(* Call to the start of the code. Offset is -(bytes to start). *)
Word8VectorSlice.concat[
Word8VectorSlice.slice(brCode, 0, SOME(brLen-4)), (* The original opcode. *)
Word8VectorSlice.full(Word8Vector.fromList(int32Signed(LargeInt.fromInt(~(Word.toInt ic+brLen)))))
] :: list
end
| fixupAddress(JumpAddress(NonAddressConstArg _), brCode, ic, list) =
let
val brLen = Word8Vector.length brCode
in
(* Call to the start of the code. Offset is -(bytes to start). *)
Word8VectorSlice.concat[
Word8VectorSlice.slice(brCode, 0, SOME(brLen-4)), (* The original opcode. *)
Word8VectorSlice.full(Word8Vector.fromList(int32Signed(LargeInt.fromInt(~(Word.toInt ic+brLen)))))
] :: list
end
| fixupAddress(_, bytes, _, list) = bytes :: list
fun reprocess(bytesList, lastCodeSize) =
let
val fixedList = List.rev(foldCode fixupAddress [] (ops, bytesList))
val newCodeSize = setLabelAddresses(ops, fixedList, 0w0)
in
if newCodeSize = lastCodeSize
then (fixedList, lastCodeSize)
else if newCodeSize > lastCodeSize
then raise InternalError "reprocess - size increased"
else reprocess(fixedList, newCodeSize)
end
in
reprocess(bytesList, setLabelAddresses(ops, bytesList, 0w0))
end
(* The handling of constants generally differs between 32- and 64-bits. In 32-bits we put all constants
inline and the GC processes the code to find the addresss. For real values the "constant" is actually
the address of the boxed real value.
In 64-bit mode inline constants were used with the MOV instruction but this has now been removed.
All constants are stored in one of two areas at the end of the
code segment. Non-addresses, including the actual values of reals, are stored in the non-address area
and addresses go in the address area. Only the latter is scanned by the GC.
The address area is also used in 32-bit mode but only has the address of the function name and the
address of the profile ref in it. *)
datatype inline32constants =
SelfAddress (* The address of the start of the code - inline absolute address 32-bit only *)
| InlineAbsoluteAddress of machineWord (* An address in the code: 32-bit only *)
| InlineRelativeAddress of machineWord (* A relative address: 32-bit only. *)
local
(* Turn an integer constant into an 8-byte vector. *)
fun intConst ival = LargeWord.fromLargeInt ival
(* Copy a real constant from memory into an 8-byte vector. *)
fun realConst c =
let
val cAsAddr = toAddress c
(* This may be a boxed real or, in 32-in-64 mode, a boxed float. *)
val cLength = length cAsAddr * wordSize
val _ = ((cLength = 0w8 orelse cLength = 0w4) andalso flags cAsAddr = F_bytes) orelse
raise InternalError "realConst: Not a real number"
fun getBytes(i, a) =
if i = 0w0 then a
else getBytes(i-0w1, a*0w256 + Word8.toLargeWord(loadByte(cAsAddr, i-0w1)))
in
getBytes(cLength, 0w0)
end
fun getConstant(Move{ source=NonAddressConstArg source, moveSize=Move32, ...}, bytes, ic, (inl, addr, na)) =
if targetArch <> Native32Bit
then
(
if source >= ~0x80000000 andalso source < 0x100000000
then (* Signed or unsigned 32-bits. *) (inl, addr, na)
else (* Too big for 32-bits. *)
(inl, addr, (ic + Word.fromInt(Word8Vector.length bytes) - 0w4, intConst source) :: na)
)
else (inl, addr, na) (* 32-bit mode. The constant will always be inline even if we've had to use LEA r,c *)
| getConstant(Move{ source=NonAddressConstArg source, moveSize=Move64, ...}, bytes, ic, (inl, addr, na)) =
if targetArch <> Native32Bit
then
(
if source >= ~0x80000000 andalso source < 0x100000000
then (* Signed or unsigned 32-bits. *) (inl, addr, na)
else (* Too big for 32-bits. *)
(inl, addr, (ic + Word.fromInt(Word8Vector.length bytes) - 0w4, intConst source) :: na)
)
else (inl, addr, na) (* 32-bit mode. The constant will always be inline even if we've had to use XOR r,r; ADD r,c *)
| getConstant(Move{ source=AddressConstArg source, ... }, bytes, ic, (inl, addr, na)) =
if targetArch <> Native32Bit
then (* Address constants go in the constant area. *)
(inl, (ic + Word.fromInt(Word8Vector.length bytes) - 0w4, source) :: addr, na)
else ((ic + Word.fromInt(Word8Vector.length bytes) - wordSize, InlineAbsoluteAddress source) :: inl, addr, na)
| getConstant(LoadAbsolute{value, ...}, bytes, ic, (inl, addr, na)) =
if targetArch = Native64Bit
then (* Address constants go in the constant area. *)
(inl, (ic + Word.fromInt(Word8Vector.length bytes) - 0w4, value) :: addr, na)
(* This is the only case of an inline constant in 32-in-64 *)
else ((ic + Word.fromInt(Word8Vector.length bytes) - nativeWordSize, InlineAbsoluteAddress value) :: inl, addr, na)
| getConstant(ArithToGenReg{ source=NonAddressConstArg source, ... }, bytes, ic, (inl, addr, na)) =
if is32bit source
then (inl, addr, na)
else (inl, addr, (ic + Word.fromInt(Word8Vector.length bytes) - 0w4, intConst source) :: na)
| getConstant(ArithToGenReg{ source=AddressConstArg source, ... }, bytes, ic, (inl, addr, na)) =
if hostIsX64
then (inl, (ic + Word.fromInt(Word8Vector.length bytes) - 0w4, source) :: addr, na)
else ((ic + Word.fromInt(Word8Vector.length bytes) - 0w4, InlineAbsoluteAddress source) :: inl, addr, na)
| getConstant(ArithMemLongConst{ source, ... }, bytes, ic, (inl, addr, na)) = (* 32-bit only. *)
((ic + Word.fromInt(Word8Vector.length bytes) - 0w4, InlineAbsoluteAddress source) :: inl, addr, na)
| getConstant(PushToStack(NonAddressConstArg constnt), bytes, ic, (inl, addr, na)) =
if is32bit constnt then (inl, addr, na)
else (inl, addr, (ic + Word.fromInt(Word8Vector.length bytes) - 0w4, intConst constnt) :: na)
| getConstant(PushToStack(AddressConstArg constnt), bytes, ic, (inl, addr, na)) =
if targetArch = Native64Bit
then (inl, (ic + Word.fromInt(Word8Vector.length bytes) - 0w4, constnt) :: addr, na)
else ((ic + Word.fromInt(Word8Vector.length bytes) - 0w4, InlineAbsoluteAddress constnt) :: inl, addr, na)
| getConstant(CallAddress(AddressConstArg w), bytes, ic, (inl, addr, na)) =
- if targetArch = Native64Bit
+ if targetArch = Native64Bit andalso not (isLocalCodeAddress w)
then (inl, (ic + Word.fromInt(Word8Vector.length bytes) - 0w4, w) :: addr, na)
else ((ic + Word.fromInt(Word8Vector.length bytes) - 0w4, InlineRelativeAddress w) :: inl, addr, na)
| getConstant(JumpAddress(AddressConstArg w), bytes, ic, (inl, addr, na)) =
- if targetArch = Native64Bit
+ if targetArch = Native64Bit andalso not (isLocalCodeAddress w)
then (inl, (ic + Word.fromInt(Word8Vector.length bytes) - 0w4, w) :: addr, na)
else ((ic + Word.fromInt(Word8Vector.length bytes) - 0w4, InlineRelativeAddress w) :: inl, addr, na)
| getConstant(LoadLabelAddress _, _, ic, (inl, addr, na)) =
(* We need the address of the code itself but it's in the first of a pair of instructions. *)
if hostIsX64 then (inl, addr, na) else ((ic + 0w1, SelfAddress) :: inl, addr, na)
| getConstant(FPLoadFromConst{constant, ...}, bytes, ic, (inl, addr, na)) =
if hostIsX64
then (inl, addr, (ic + Word.fromInt(Word8Vector.length bytes) - 0w4, realConst constant) :: na)
else ((ic + Word.fromInt(Word8Vector.length bytes) - 0w4, InlineAbsoluteAddress constant) :: inl, addr, na)
| getConstant(FPArithConst{ source, ... }, bytes, ic, (inl, addr, na)) =
if hostIsX64
then (inl, addr, (ic + Word.fromInt(Word8Vector.length bytes) - 0w4, realConst source) :: na)
else ((ic + Word.fromInt(Word8Vector.length bytes) - 0w4, InlineAbsoluteAddress source) :: inl, addr, na)
| getConstant(XMMArith { source=AddressConstArg constVal, ... }, bytes, ic, (inl, addr, na)) =
(* Real.real constant or, with 32-bit words, a Real32.real constant. *)
if hostIsX64
then (inl, addr, (ic + Word.fromInt(Word8Vector.length bytes) - 0w4, realConst constVal) :: na)
else ((ic + Word.fromInt(Word8Vector.length bytes) - 0w4, InlineAbsoluteAddress constVal) :: inl, addr, na)
| getConstant(XMMArith { source=NonAddressConstArg constVal, ... }, bytes, ic, (inl, addr, na)) =
(* Real32.real constant in native 64-bit. *)
(inl, addr, (ic + Word.fromInt(Word8Vector.length bytes) - 0w4, intConst constVal) :: na)
| getConstant(MultiplyR{ source=NonAddressConstArg source, ... }, bytes, ic, (inl, addr, na)) =
if is32bit source
then (inl, addr, na)
else (inl, addr, (ic + Word.fromInt(Word8Vector.length bytes) - 0w4, intConst source) :: na)
| getConstant(CondMove{ source=NonAddressConstArg source, ... }, bytes, ic, (inl, addr, na)) =
if targetArch <> Native32Bit
then (inl, addr, (ic + Word.fromInt(Word8Vector.length bytes) - 0w4, intConst source) :: na)
else (inl, addr, na) (* 32-bit mode. The constant will always be inline. *)
| getConstant(CondMove{ source=AddressConstArg source, ... }, bytes, ic, (inl, addr, na)) =
if targetArch <> Native32Bit
then (* Address constants go in the constant area. *)
(inl, (ic + Word.fromInt(Word8Vector.length bytes) - 0w4, source) :: addr, na)
else ((ic + Word.fromInt(Word8Vector.length bytes) - wordSize, InlineAbsoluteAddress source) :: inl, addr, na)
| getConstant(_, _, _, l) = l
in
val getConstants = foldCode getConstant ([], [], [])
end
(* It is convenient to have AllocStore and AllocStoreVariable as primitives at the higher
level but at this point it's better to expand them into their basic instructions. *)
fun expandComplexOperations(instrs, oldLabelCount) =
let
val labelCount = ref oldLabelCount
fun mkLabel() = Label{labelNo= !labelCount} before labelCount := !labelCount + 1
(* On X86/64 the local pointer is in r15. On X86/32 it's in memRegs. *)
val localPointer =
if hostIsX64 then RegisterArg r15 else MemoryArg{base=ebp, offset=memRegLocalMPointer, index=NoIndex}
val nativeWordOpSize = if hostIsX64 then OpSize64 else OpSize32
fun allocStoreCommonCode (resultReg, isVarAlloc, regSaveSet: genReg list) =
let
val compare =
ArithToGenReg{opc=CMP, output=resultReg,
source=MemoryArg{base=ebp, offset=memRegLocalMbottom, index=NoIndex}, opSize=nativeWordOpSize}
(* Normally we won't have run out of store so we want the default
branch prediction to skip the test here. However doing that
involves adding an extra branch which lengthens the code so
it's probably not worth while. *)
(* Just checking against the lower limit can fail
in the situation where the heap pointer is at the low end of
the address range and the store required is so large that the
subtraction results in a negative number. In that case it
will be > (unsigned) lower_limit so in addition we have
to check that the result is < (unsigned) heap_pointer.
This actually happened on Windows with X86-64.
In theory this can happen with fixed-size allocations as
well as variable allocations but in practice fixed-size
allocations are going to be small enough that it's not a
problem. *)
val destLabel = mkLabel()
val branches =
if isVarAlloc
then
let
val extraLabel = mkLabel()
in
[ConditionalBranch{test=JB, label=extraLabel},
ArithToGenReg{opc=CMP, output=resultReg, source=localPointer, opSize=nativeWordOpSize},
ConditionalBranch{test=JB, label=destLabel},
JumpLabel extraLabel]
end
else [ConditionalBranch{test=JNB, label=destLabel}]
val callRts = CallRTS{rtsEntry=HeapOverflowCall, saveRegs=regSaveSet}
val fixup = JumpLabel destLabel
(* Update the heap pointer now we have the store. This is also
used by the RTS in the event of a trap to work out how much
store was being allocated. *)
val update =
if hostIsX64 then Move{source=RegisterArg resultReg, destination=RegisterArg r15, moveSize=Move64}
else Move{source=RegisterArg resultReg,
destination=MemoryArg{base=ebp, offset=memRegLocalMPointer, index=NoIndex}, moveSize=Move32}
in
compare :: branches @ [callRts, fixup, update]
end
fun doExpansion([], code, _) = code
| doExpansion(AllocStore {size, output, saveRegs} :: instrs, code, inAllocation) =
let
val _ = inAllocation andalso raise InternalError "doExpansion: Allocation started but not complete"
val () = if List.exists (fn r => r = output) saveRegs then raise InternalError "AllocStore: in set" else ()
val startCode =
case targetArch of
Native64Bit =>
let
val bytes = (size + 1) * Word.toInt wordSize
in
[LoadAddress{output=output, offset = ~ bytes, base=SOME r15, index=NoIndex, opSize=OpSize64}]
(* TODO: What if it's too big to fit? *)
end
| Native32Bit =>
let
val bytes = (size + 1) * Word.toInt wordSize
in
[Move{source=MemoryArg{base=ebp, offset=memRegLocalMPointer, index=NoIndex},
destination=RegisterArg output, moveSize=Move32},
LoadAddress{output=output, offset = ~ bytes, base=SOME output, index=NoIndex, opSize=OpSize32}]
end
| ObjectId32Bit =>
let
(* We must allocate an even number of words. *)
val heapWords = if Int.rem(size, 2) = 1 then size+1 else size+2
val bytes = heapWords * Word.toInt wordSize
in
[LoadAddress{output=output, offset = ~ bytes, base=SOME r15, index=NoIndex, opSize=OpSize64}]
end
val resultCode = startCode @ allocStoreCommonCode(output, false, saveRegs)
in
doExpansion(instrs, (List.rev resultCode) @ code, true)
end
| doExpansion(AllocStoreVariable {size, output, saveRegs} :: instrs, code, inAllocation) =
let
(* Allocates memory. The "size" register contains the number of words as a tagged int. *)
val _ = inAllocation andalso raise InternalError "doExpansion: Allocation started but not complete"
val () = if List.exists (fn r => r = output) saveRegs then raise InternalError "AllocStore: in set" else ()
(* Negate the length and add it to the current heap pointer. *)
(* Compute the number of bytes into dReg. The length in sReg is the number
of words as a tagged value so we need to multiply it, add wordSize to
include one word for the header then subtract the, multiplied, tag.
We use LEA here but want to avoid having an empty base register. *)
val _ = size = output andalso raise InternalError "AllocStoreVariable : same register for size and output"
val startCode =
if wordSize = 0w8 (* 8-byte words *)
then
[
ArithToGenReg{opc=XOR, output=output, source=RegisterArg output, opSize=OpSize32 (* Rest is zeroed *)},
ArithToGenReg{opc=SUB, output=output, source=RegisterArg size, opSize=OpSize64},
LoadAddress{output=output, base=SOME r15, offset= ~(Word.toInt wordSize-4), index=Index4 output, opSize=OpSize64 }
]
else (* 4 byte words *)
[
LoadAddress{output=output, base=SOME size, offset=Word.toInt wordSize-2,
index=Index1 size, opSize=nativeWordOpSize },
Negative{output=output, opSize=nativeWordOpSize},
ArithToGenReg{opc=ADD, output=output, source=localPointer, opSize=nativeWordOpSize}
]
(* If this is 32-in-64 we need to round down to the next 8-byte boundary. *)
val roundCode =
if targetArch = ObjectId32Bit
then [ArithToGenReg{opc=AND, output=output, source=NonAddressConstArg ~8, opSize=OpSize64 }]
else []
val resultCode = startCode @ roundCode @ allocStoreCommonCode(output, true, saveRegs)
in
doExpansion(instrs, (List.rev resultCode) @ code, true)
end
| doExpansion(StoreInitialised :: instrs, code, _) = doExpansion(instrs, code, false)
| doExpansion(instr :: instrs, code, inAlloc) = doExpansion(instrs, instr::code, inAlloc)
val expanded = List.rev(doExpansion(instrs, [], false))
in
(expanded, !labelCount)
end
fun printCode (Code{procName, printStream, ...}, seg) =
let
val print = printStream
val ptr = ref 0w0;
(* prints a string representation of a number *)
fun printValue v =
if v < 0 then (print "-"; print(LargeInt.toString (~ v))) else print(LargeInt.toString v)
infix 3 +:= ;
fun (x +:= y) = (x := !x + (y:word));
fun get16s (a, seg) : int =
let
val b0 = Word8.toInt (codeVecGet (seg, a));
val b1 = Word8.toInt (codeVecGet (seg, a + 0w1));
val b1' = if b1 >= 0x80 then b1 - 0x100 else b1;
in
(b1' * 0x100) + b0
end
fun get16u(a, seg) : int =
Word8.toInt (codeVecGet (seg, a + 0w1)) * 0x100 + Word8.toInt (codeVecGet (seg, a))
(* Get 1 unsigned byte from the given offset in the segment. *)
fun get8u (a, seg) : Word8.word = codeVecGet (seg, a);
(* Get 1 signed byte from the given offset in the segment. *)
fun get8s (a, seg) : int = Word8.toIntX (codeVecGet (seg, a));
(* Get 1 signed 32 bit word from the given offset in the segment. *)
fun get32s (a, seg) : LargeInt.int =
let
val b0 = Word8.toLargeInt (codeVecGet (seg, a));
val b1 = Word8.toLargeInt (codeVecGet (seg, a + 0w1));
val b2 = Word8.toLargeInt (codeVecGet (seg, a + 0w2));
val b3 = Word8.toLargeInt (codeVecGet (seg, a + 0w3));
val b3' = if b3 >= 0x80 then b3 - 0x100 else b3;
val topHw = (b3' * 0x100) + b2;
val bottomHw = (b1 * 0x100) + b0;
in
(topHw * exp2_16) + bottomHw
end
fun get64s (a, seg) : LargeInt.int =
let
val b0 = Word8.toLargeInt (codeVecGet (seg, a));
val b1 = Word8.toLargeInt (codeVecGet (seg, a + 0w1));
val b2 = Word8.toLargeInt (codeVecGet (seg, a + 0w2));
val b3 = Word8.toLargeInt (codeVecGet (seg, a + 0w3));
val b4 = Word8.toLargeInt (codeVecGet (seg, a + 0w4));
val b5 = Word8.toLargeInt (codeVecGet (seg, a + 0w5));
val b6 = Word8.toLargeInt (codeVecGet (seg, a + 0w6));
val b7 = Word8.toLargeInt (codeVecGet (seg, a + 0w7));
val b7' = if b7 >= 0x80 then b7 - 0x100 else b7;
in
((((((((b7' * 0x100 + b6) * 0x100 + b5) * 0x100 + b4) * 0x100 + b3)
* 0x100 + b2) * 0x100) + b1) * 0x100) + b0
end
fun print32 () = printValue (get32s (!ptr, seg)) before (ptr +:= 0w4)
and print64 () = printValue (get64s (!ptr, seg)) before (ptr +:= 0w8)
and print16 () = printValue (LargeInt.fromInt(get16s (!ptr, seg)) before (ptr +:= 0w2))
and print8 () = printValue (LargeInt.fromInt(get8s (!ptr, seg)) before (ptr +:= 0w1))
fun printJmp () =
let
val valu = get8s (!ptr, seg) before ptr +:= 0w1
in
print (Word.fmt StringCvt.HEX (Word.fromInt valu + !ptr))
end
(* Print an effective address. The register field may designate a general register
or an xmm register depending on the instruction. *)
fun printEAGeneral printRegister (rex, sz) =
let
val modrm = codeVecGet (seg, !ptr)
val () = ptr +:= 0w1
(* Decode the Rex prefix if present. *)
val rexX = (rex andb8 0wx2) <> 0w0
val rexB = (rex andb8 0wx1) <> 0w0
val prefix =
case sz of
SZByte => "byte ptr "
| SZWord => "word ptr "
| SZDWord => "dword ptr "
| SZQWord => "qword ptr "
in
case (modrm >>- 0w6, modrm andb8 0w7, hostIsX64) of
(0w3, rm, _) => printRegister(rm, rexB, sz)
| (md, 0w4, _) =>
let (* s-i-b present. *)
val sib = codeVecGet (seg, !ptr)
val () = ptr +:= 0w1
val ss = sib >>- 0w6
val index = (sib >>- 0w3) andb8 0w7
val base = sib andb8 0w7
in
print prefix;
case (md, base, hostIsX64) of
(0w1, _, _) => print8 ()
| (0w2, _, _) => print32 ()
| (0w0, 0w5, _) => print32 () (* Absolute in 32-bit mode. PC-relative in 64-bit ?? *)
| _ => ();
print "[";
if md <> 0w0 orelse base <> 0w5
then
(
print (genRegRepr (mkReg (base, rexB), sz32_64));
if index = 0w4 then () else print ","
)
else ();
if index = 0w4 andalso not rexX (* No index. *)
then ()
else print (genRegRepr (mkReg(index, rexX), sz32_64) ^
(if ss = 0w0 then "*1"
else if ss = 0w1 then "*2"
else if ss = 0w2 then "*4"
else "*8"));
print "]"
end
| (0w0, 0w5, false) => (* Absolute address.*) (print prefix; print32 ())
| (0w0, 0w5, _) => (* PC-relative in 64-bit *)
(print prefix; print ".+"; print32 ())
| (md, rm, _) => (* register plus offset. *)
(
print prefix;
if md = 0w1 then print8 ()
else if md = 0w2 then print32 ()
else ();
print ("[" ^ genRegRepr (mkReg(rm, rexB), sz32_64) ^ "]")
)
end
(* For most instructions we want to print a general register. *)
val printEA =
printEAGeneral (fn (rm, rexB, sz) => print (genRegRepr (mkReg(rm, rexB), sz)))
and printEAxmm =
printEAGeneral (fn (rm, _, _) => print (xmmRegRepr(SSE2Reg rm)))
fun printArith opc =
print
(case opc of
0 => "add "
| 1 => "or "
| 2 => "adc "
| 3 => "sbb "
| 4 => "and "
| 5 => "sub "
| 6 => "xor "
| _ => "cmp "
)
fun printGvEv (opByte, rex, rexR, sz) =
let
(* Register is in next byte. *)
val nb = codeVecGet (seg, !ptr)
val reg = (nb >>- 0w3) andb8 0w7
in
printArith(Word8.toInt((opByte div 0w8) mod 0w8));
print "\t";
print (genRegRepr (mkReg(reg, rexR), sz));
print ",";
printEA(rex, sz)
end
fun printMovCToR (opByte, sz, rexB) =
(
print "mov \t";
print(genRegRepr (mkReg (opByte mod 0w8, rexB), sz));
print ",";
case sz of SZDWord => print32 () | SZQWord => print64 () | _ => print "???"
)
fun printShift (opByte, rex, sz) =
let
(* Opcode is determined by next byte. *)
val nb = Word8.toInt (codeVecGet (seg, !ptr))
val opc = (nb div 8) mod 8
in
print
(case opc of
4 => "shl "
| 5 => "shr "
| 7 => "sar "
| _ => "???"
);
print "\t";
printEA(rex, sz);
print ",";
if opByte = opToInt Group2_1_A32 then print "1"
else if opByte = opToInt Group2_CL_A32 then print "cl"
else print8 ()
end
fun printFloat (opByte, rex) =
let
(* Opcode is in next byte. *)
val opByte2 = codeVecGet (seg, !ptr)
val nnn = (opByte2 >>- 0w3) andb8 0w7
val escNo = opByte andb8 0wx7
in
if (opByte2 andb8 0wxC0) = 0wxC0
then (* mod = 11 *)
(
case (escNo, nnn, opByte2 andb8 0wx7 (* modrm *)) of
(0w1, 0w4, 0w0) => print "fchs"
| (0w1, 0w4, 0w1) => print "fabs"
| (0w1, 0w5, 0w6) => print "fldz"
| (0w1, 0w5, 0w1) => print "flf1"
| (0w7, 0w4, 0w0) => print "fnstsw\tax"
| (0w1, 0w5, 0w0) => print "fld1"
| (0w1, 0w6, 0w3) => print "fpatan"
| (0w1, 0w7, 0w2) => print "fsqrt"
| (0w1, 0w7, 0w6) => print "fsin"
| (0w1, 0w7, 0w7) => print "fcos"
| (0w1, 0w6, 0w7) => print "fincstp"
| (0w1, 0w6, 0w6) => print "fdecstp"
| (0w3, 0w4, 0w2) => print "fnclex"
| (0w5, 0w2, rno) => print ("fst \tst(" ^ Word8.toString rno ^ ")")
| (0w5, 0w3, rno) => print ("fstp\tst(" ^ Word8.toString rno ^ ")")
| (0w1, 0w0, rno) => print ("fld \tst(" ^ Word8.toString rno ^ ")")
| (0w1, 0w1, rno) => print ("fxch\tst(" ^ Word8.toString rno ^ ")")
| (0w0, 0w3, rno) => print ("fcomp\tst(" ^ Word8.toString rno ^ ")")
| (0w0, 0w0, rno) => print ("fadd\tst,st(" ^ Word8.toString rno ^ ")")
| (0w0, 0w1, rno) => print ("fmul\tst,st(" ^ Word8.toString rno ^ ")")
| (0w0, 0w4, rno) => print ("fsub\tst,st(" ^ Word8.toString rno ^ ")")
| (0w0, 0w5, rno) => print ("fsubr\tst,st(" ^ Word8.toString rno ^ ")")
| (0w0, 0w6, rno) => print ("fdiv\tst,st(" ^ Word8.toString rno ^ ")")
| (0w0, 0w7, rno) => print ("fdivr\tst,st(" ^ Word8.toString rno ^ ")")
| (0w5, 0w0, rno) => print ("ffree\tst(" ^ Word8.toString rno ^ ")")
| _ => (printValue(Word8.toLargeInt opByte); printValue(Word8.toLargeInt opByte2));
ptr +:= 0w1
)
else (* mod = 00, 01, 10 *)
(
case (escNo, nnn) of
(0w0, 0w0) => (print "fadd\t"; printEA(rex, SZDWord)) (* Single precision. *)
| (0w0, 0w1) => (print "fmul\t"; printEA(rex, SZDWord))
| (0w0, 0w3) => (print "fcomp\t"; printEA(rex, SZDWord))
| (0w0, 0w4) => (print "fsub\t"; printEA(rex, SZDWord))
| (0w0, 0w5) => (print "fsubr\t"; printEA(rex, SZDWord))
| (0w0, 0w6) => (print "fdiv\t"; printEA(rex, SZDWord))
| (0w0, 0w7) => (print "fdivr\t"; printEA(rex, SZDWord))
| (0w1, 0w0) => (print "fld \t"; printEA(rex, SZDWord))
| (0w1, 0w2) => (print "fst\t"; printEA(rex, SZDWord))
| (0w1, 0w3) => (print "fstp\t"; printEA(rex, SZDWord))
| (0w1, 0w5) => (print "fldcw\t"; printEA(rex, SZWord)) (* Control word is 16 bits *)
| (0w1, 0w7) => (print "fstcw\t"; printEA(rex, SZWord)) (* Control word is 16 bits *)
| (0w3, 0w0) => (print "fild\t"; printEA(rex, SZDWord)) (* 32-bit int. *)
| (0w7, 0w5) => (print "fild\t"; printEA(rex, SZQWord)) (* 64-bit int. *)
| (0w3, 0w3) => (print "fistp\t"; printEA(rex, SZDWord)) (* 32-bit int. *)
| (0w7, 0w7) => (print "fistp\t"; printEA(rex, SZQWord)) (* 64-bit int. *)
| (0w4, 0w0) => (print "fadd\t"; printEA(rex, SZQWord)) (* Double precision. *)
| (0w4, 0w1) => (print "fmul\t"; printEA(rex, SZQWord))
| (0w4, 0w3) => (print "fcomp\t"; printEA(rex, SZQWord))
| (0w4, 0w4) => (print "fsub\t"; printEA(rex, SZQWord))
| (0w4, 0w5) => (print "fsubr\t"; printEA(rex, SZQWord))
| (0w4, 0w6) => (print "fdiv\t"; printEA(rex, SZQWord))
| (0w4, 0w7) => (print "fdivr\t"; printEA(rex, SZQWord))
| (0w5, 0w0) => (print "fld \t"; printEA(rex, SZQWord))
| (0w5, 0w2) => (print "fst\t"; printEA(rex, SZQWord))
| (0w5, 0w3) => (print "fstp\t"; printEA(rex, SZQWord))
| _ => (printValue(Word8.toLargeInt opByte); printValue(Word8.toLargeInt opByte2))
)
end
fun printJmp32 oper =
let
val valu = get32s (!ptr, seg) before (ptr +:= 0w4)
in
print oper; print "\t";
print (Word.fmt StringCvt.HEX (!ptr + Word.fromLargeInt valu))
end
fun printMask mask =
let
val wordMask = Word.fromInt mask
fun printAReg n =
if n = regs then ()
else
(
if (wordMask andb (0w1 << Word.fromInt n)) <> 0w0
then (print(regRepr(regN n)); print " ")
else ();
printAReg(n+1)
)
in
printAReg 0
end
in
if procName = "" (* No name *) then print "?" else print procName;
print ":\n";
while get8u (!ptr, seg) <> 0wxf4 (* HLT. *) do
let
val () = print (Word.fmt StringCvt.HEX (!ptr)) (* The address in hex. *)
val () = print "\t"
(* See if we have a lock prefix. *)
val () =
if get8u (!ptr, seg) = 0wxF0
then (print "lock "; ptr := !ptr + 0w1)
else ()
val legacyPrefix =
let
val p = get8u (!ptr, seg)
in
if p = 0wxF2 orelse p = 0wxF3 orelse p = 0wx66
then (ptr := !ptr + 0w1; p)
else 0wx0
end
(* See if we have a REX byte. *)
val rex =
let
val b = get8u (!ptr, seg);
in
if b >= 0wx40 andalso b <= 0wx4f
then (ptr := !ptr + 0w1; b)
else 0w0
end
val rexW = (rex andb8 0wx8) <> 0w0
val rexR = (rex andb8 0wx4) <> 0w0
val rexB = (rex andb8 0wx1) <> 0w0
val opByte = get8u (!ptr, seg) before ptr +:= 0w1
val sizeFromRexW = if rexW then SZQWord else SZDWord
in
case opByte of
0wx03 => printGvEv (opByte, rex, rexR, sizeFromRexW)
| 0wx0b => printGvEv (opByte, rex, rexR, sizeFromRexW)
| 0wx0f => (* ESCAPE *)
let
(* Opcode is in next byte. *)
val opByte2 = codeVecGet (seg, !ptr)
val () = (ptr +:= 0w1)
fun printcmov movop =
let
val nb = codeVecGet (seg, !ptr)
val reg = (nb >>- 0w3) andb8 0w7
in
print movop;
print "\t";
print (genRegRepr (mkReg(reg, rexR), sizeFromRexW));
print ",";
printEA(rex, sizeFromRexW)
end
in
case legacyPrefix of
0w0 =>
(
case opByte2 of
0wx2e =>
let (* ucomiss doesn't have a prefix. *)
val nb = codeVecGet (seg, !ptr)
val reg = SSE2Reg((nb >>- 0w3) andb8 0w7)
in
print "ucomiss\t"; print(xmmRegRepr reg); print ","; printEAxmm(rex, SZDWord)
end
| 0wx40 => printcmov "cmovo"
| 0wx41 => printcmov "cmovno"
| 0wx42 => printcmov "cmovb"
| 0wx43 => printcmov "cmovnb"
| 0wx44 => printcmov "cmove"
| 0wx45 => printcmov "cmovne"
| 0wx46 => printcmov "cmovna"
| 0wx47 => printcmov "cmova"
| 0wx48 => printcmov "cmovs"
| 0wx49 => printcmov "cmovns"
| 0wx4a => printcmov "cmovp"
| 0wx4b => printcmov "cmovnp"
| 0wx4c => printcmov "cmovl"
| 0wx4d => printcmov "cmovge"
| 0wx4e => printcmov "cmovle"
| 0wx4f => printcmov "cmovg"
| 0wxB1 =>
let
val nb = codeVecGet (seg, !ptr);
val reg = (nb >>- 0w3) andb8 0w7
in
(* The address argument comes first in the assembly code. *)
print "cmpxchg\t";
printEA (rex, sizeFromRexW);
print ",";
print (genRegRepr (mkReg(reg, rexR), sizeFromRexW))
end
| 0wxC1 =>
let
val nb = codeVecGet (seg, !ptr);
val reg = (nb >>- 0w3) andb8 0w7
in
(* The address argument comes first in the assembly code. *)
print "xadd\t";
printEA (rex, sizeFromRexW);
print ",";
print (genRegRepr (mkReg(reg, rexR), sizeFromRexW))
end
| 0wxB6 =>
let
val nb = codeVecGet (seg, !ptr);
val reg = (nb >>- 0w3) andb8 0w7
in
print "movzx\t";
print (genRegRepr (mkReg(reg, rexR), sizeFromRexW));
print ",";
printEA (rex, SZByte)
end
| 0wxB7 =>
let
val nb = codeVecGet (seg, !ptr);
val reg = (nb >>- 0w3) andb8 0w7
in
print "movzx\t";
print (genRegRepr (mkReg(reg, rexR), sizeFromRexW));
print ",";
printEA (rex, SZWord)
end
| 0wxBE =>
let
val nb = codeVecGet (seg, !ptr);
val reg = (nb >>- 0w3) andb8 0w7
in
print "movsx\t";
print (genRegRepr (mkReg(reg, rexR), sizeFromRexW));
print ",";
printEA (rex, SZByte)
end
| 0wxBF =>
let
val nb = codeVecGet (seg, !ptr);
val reg = (nb >>- 0w3) andb8 0w7
in
print "movsx\t";
print (genRegRepr (mkReg(reg, rexR), sizeFromRexW));
print ",";
printEA (rex, SZWord)
end
| 0wxAE =>
let
(* Opcode is determined by the next byte. *)
val opByte2 = codeVecGet (seg, !ptr);
val nnn = (opByte2 >>- 0w3) andb8 0w7
in
case nnn of
0wx2 => (print "ldmxcsr\t"; printEA(rex, SZDWord))
| 0wx3 => (print "stmxcsr\t"; printEA(rex, SZDWord))
| _ => (printValue(Word8.toLargeInt opByte); printValue(Word8.toLargeInt opByte2))
end
| 0wxAF =>
let
val nb = codeVecGet (seg, !ptr);
val reg = (nb >>- 0w3) andb8 0w7
in
print "imul\t";
print (genRegRepr (mkReg(reg, rexR), sizeFromRexW));
print ",";
printEA (rex, sizeFromRexW)
end
| 0wx80 => printJmp32 "jo "
| 0wx81 => printJmp32 "jno "
| 0wx82 => printJmp32 "jb "
| 0wx83 => printJmp32 "jnb "
| 0wx84 => printJmp32 "je "
| 0wx85 => printJmp32 "jne "
| 0wx86 => printJmp32 "jna "
| 0wx87 => printJmp32 "ja "
| 0wx88 => printJmp32 "js "
| 0wx89 => printJmp32 "jns "
| 0wx8a => printJmp32 "jp "
| 0wx8b => printJmp32 "jnp "
| 0wx8c => printJmp32 "jl "
| 0wx8d => printJmp32 "jge "
| 0wx8e => printJmp32 "jle "
| 0wx8f => printJmp32 "jg "
| 0wx90 => (print "seto\t"; printEA (rex, SZByte))
| 0wx91 => (print "setno\t"; printEA (rex, SZByte))
| 0wx92 => (print "setb\t"; printEA (rex, SZByte))
| 0wx93 => (print "setnb\t"; printEA (rex, SZByte))
| 0wx94 => (print "sete\t"; printEA (rex, SZByte))
| 0wx95 => (print "setne\t"; printEA (rex, SZByte))
| 0wx96 => (print "setna\t"; printEA (rex, SZByte))
| 0wx97 => (print "seta\t"; printEA (rex, SZByte))
| 0wx98 => (print "sets\t"; printEA (rex, SZByte))
| 0wx99 => (print "setns\t"; printEA (rex, SZByte))
| 0wx9a => (print "setp\t"; printEA (rex, SZByte))
| 0wx9b => (print "setnp\t"; printEA (rex, SZByte))
| 0wx9c => (print "setl\t"; printEA (rex, SZByte))
| 0wx9d => (print "setge\t"; printEA (rex, SZByte))
| 0wx9e => (print "setle\t"; printEA (rex, SZByte))
| 0wx9f => (print "setg\t"; printEA (rex, SZByte))
| _ => (print "esc\t"; printValue(Word8.toLargeInt opByte2))
)
| 0wxf2 => (* SSE2 instruction *)
let
val nb = codeVecGet (seg, !ptr)
val rr = (nb >>- 0w3) andb8 0w7
val reg = SSE2Reg rr
in
case opByte2 of
0wx10 => ( print "movsd\t"; print(xmmRegRepr reg); print ","; printEAxmm(rex, SZQWord) )
| 0wx11 => ( print "movsd\t"; printEAxmm(rex, SZQWord); print ","; print(xmmRegRepr reg) )
| 0wx2a => ( print "cvtsi2sd\t"; print(xmmRegRepr reg); print ","; printEA(rex, sizeFromRexW) )
| 0wx2c =>
( print "cvttsd2si\t"; print (genRegRepr (mkReg(rr, rexR), sizeFromRexW)); print ","; printEAxmm(rex, sizeFromRexW) )
| 0wx2d =>
( print "cvtsd2si\t"; print (genRegRepr (mkReg(rr, rexR), sizeFromRexW)); print ","; printEAxmm(rex, sizeFromRexW) )
| 0wx58 => ( print "addsd\t"; print(xmmRegRepr reg); print ","; printEAxmm(rex, SZQWord) )
| 0wx59 => ( print "mulsd\t"; print(xmmRegRepr reg); print ","; printEAxmm(rex, SZQWord) )
| 0wx5a => ( print "cvtsd2ss\t"; print(xmmRegRepr reg); print ","; printEAxmm(rex, SZQWord) )
| 0wx5c => ( print "subsd\t"; print(xmmRegRepr reg); print ","; printEAxmm(rex, SZQWord) )
| 0wx5e => ( print "divsd\t"; print(xmmRegRepr reg); print ","; printEAxmm(rex, SZQWord) )
| b => (print "F2\n"; print "0F\n"; print(Word8.fmt StringCvt.HEX b))
end
| 0wxf3 => (* SSE2 instruction. *)
let
val nb = codeVecGet (seg, !ptr)
val rr = (nb >>- 0w3) andb8 0w7
val reg = SSE2Reg rr
in
case opByte2 of
0wx10 => ( print "movss\t"; print(xmmRegRepr reg); print ","; printEAxmm(rex, SZDWord) )
| 0wx11 => ( print "movss\t"; printEAxmm(rex, SZDWord); print ","; print(xmmRegRepr reg) )
| 0wx2a => ( print "cvtsi2ss\t"; print(xmmRegRepr reg); print ","; printEA(rex, sizeFromRexW) )
| 0wx2c =>
( print "cvttss2si\t"; print (genRegRepr (mkReg(rr, rexR), sizeFromRexW)); print ","; printEAxmm(rex, sizeFromRexW) )
| 0wx2d =>
( print "cvtss2si\t"; print (genRegRepr (mkReg(rr, rexR), sizeFromRexW)); print ","; printEAxmm(rex, sizeFromRexW) )
| 0wx5a => ( print "cvtss2sd\t"; print(xmmRegRepr reg); print ","; printEAxmm(rex, SZDWord) )
| 0wx58 => ( print "addss\t"; print(xmmRegRepr reg); print ","; printEAxmm(rex, SZDWord) )
| 0wx59 => ( print "mulss\t"; print(xmmRegRepr reg); print ","; printEAxmm(rex, SZDWord) )
| 0wx5c => ( print "subss\t"; print(xmmRegRepr reg); print ","; printEAxmm(rex, SZDWord) )
| 0wx5e => ( print "divss\t"; print(xmmRegRepr reg); print ","; printEAxmm(rex, SZDWord) )
| b => (print "F3\n"; print "0F\n"; print(Word8.fmt StringCvt.HEX b))
end
| 0wx66 => (* SSE2 instruction *)
let
val nb = codeVecGet (seg, !ptr)
val reg = SSE2Reg((nb >>- 0w3) andb8 0w7)
in
case opByte2 of
0wx2e => ( print "ucomisd\t"; print(xmmRegRepr reg); print ","; printEAxmm(rex, SZQWord) )
| 0wx54 => ( print "andpd\t"; print(xmmRegRepr reg); print ","; printEAxmm(rex, SZQWord) )
| 0wx57 => ( print "xorpd\t"; print(xmmRegRepr reg); print ","; printEAxmm(rex, SZQWord) )
| 0wx6e => ( print (if rexW then "movq\t" else "movd\t"); print(xmmRegRepr reg); print ","; printEA(rex, sizeFromRexW) )
| 0wx7e => ( print (if rexW then "movq\t" else "movd\t"); printEA(rex, sizeFromRexW); print ","; print(xmmRegRepr reg) )
| 0wx73 => ( print "psrldq\t"; printEAxmm(rex, SZQWord); print ","; print8 ())
| b => (print "66\n"; print "0F\n"; print(Word8.fmt StringCvt.HEX b))
end
| _ => (print "esc\t"; printValue(Word8.toLargeInt opByte2))
end (* ESCAPE *)
| 0wx13 => printGvEv (opByte, rex, rexR, sizeFromRexW)
| 0wx1b => printGvEv (opByte, rex, rexR, sizeFromRexW)
| 0wx23 => printGvEv (opByte, rex, rexR, sizeFromRexW)
| 0wx2b => printGvEv (opByte, rex, rexR, sizeFromRexW)
| 0wx33 => printGvEv (opByte, rex, rexR, sizeFromRexW)
| 0wx3b => printGvEv (opByte, rex, rexR, sizeFromRexW)
(* Push and Pop. These are 64-bit on X86/64 whether there is REX prefix or not. *)
| 0wx50 => print ("push\t" ^ genRegRepr (mkReg (opByte mod 0w8, rexB), sz32_64))
| 0wx51 => print ("push\t" ^ genRegRepr (mkReg (opByte mod 0w8, rexB), sz32_64))
| 0wx52 => print ("push\t" ^ genRegRepr (mkReg (opByte mod 0w8, rexB), sz32_64))
| 0wx53 => print ("push\t" ^ genRegRepr (mkReg (opByte mod 0w8, rexB), sz32_64))
| 0wx54 => print ("push\t" ^ genRegRepr (mkReg (opByte mod 0w8, rexB), sz32_64))
| 0wx55 => print ("push\t" ^ genRegRepr (mkReg (opByte mod 0w8, rexB), sz32_64))
| 0wx56 => print ("push\t" ^ genRegRepr (mkReg (opByte mod 0w8, rexB), sz32_64))
| 0wx57 => print ("push\t" ^ genRegRepr (mkReg (opByte mod 0w8, rexB), sz32_64))
| 0wx58 => print ("pop \t" ^ genRegRepr (mkReg (opByte mod 0w8, rexB), sz32_64))
| 0wx59 => print ("pop \t" ^ genRegRepr (mkReg (opByte mod 0w8, rexB), sz32_64))
| 0wx5a => print ("pop \t" ^ genRegRepr (mkReg (opByte mod 0w8, rexB), sz32_64))
| 0wx5b => print ("pop \t" ^ genRegRepr (mkReg (opByte mod 0w8, rexB), sz32_64))
| 0wx5c => print ("pop \t" ^ genRegRepr (mkReg (opByte mod 0w8, rexB), sz32_64))
| 0wx5d => print ("pop \t" ^ genRegRepr (mkReg (opByte mod 0w8, rexB), sz32_64))
| 0wx5e => print ("pop \t" ^ genRegRepr (mkReg (opByte mod 0w8, rexB), sz32_64))
| 0wx5f => print ("pop \t" ^ genRegRepr (mkReg (opByte mod 0w8, rexB), sz32_64))
| 0wx63 => (* MOVSXD. This is ARPL in 32-bit mode but that's never used here. *)
let
val nb = codeVecGet (seg, !ptr)
val reg = (nb >>- 0w3) andb8 0w7
in
print "movsxd\t";
print (genRegRepr (mkReg(reg, rexR), sizeFromRexW));
print ",";
printEA(rex, SZDWord)
end
| 0wx68 => (print "push\t"; print32 ())
| 0wx69 =>
let
(* Register is in next byte. *)
val nb = codeVecGet (seg, !ptr)
val reg = (nb >>- 0w3) andb8 0w7
in
print "imul\t"; print(genRegRepr (mkReg(reg, rexR), sizeFromRexW)); print ",";
printEA(rex, sizeFromRexW); print ","; print32 ()
end
| 0wx6a => (print "push\t"; print8 ())
| 0wx6b =>
let
(* Register is in next byte. *)
val nb = codeVecGet (seg, !ptr)
val reg = (nb >>- 0w3) andb8 0w7
in
print "imul\t"; print(genRegRepr (mkReg(reg, rexR), sizeFromRexW)); print ",";
printEA(rex, sizeFromRexW); print ","; print8 ()
end
| 0wx70 => (print "jo \t"; printJmp())
| 0wx71 => (print "jno \t"; printJmp())
| 0wx72 => (print "jb \t"; printJmp())
| 0wx73 => (print "jnb \t"; printJmp())
| 0wx74 => (print "je \t"; printJmp())
| 0wx75 => (print "jne \t"; printJmp())
| 0wx76 => (print "jna \t"; printJmp())
| 0wx77 => (print "ja \t"; printJmp())
| 0wx78 => (print "js \t"; printJmp())
| 0wx79 => (print "jns \t"; printJmp())
| 0wx7a => (print "jp \t"; printJmp())
| 0wx7b => (print "jnp \t"; printJmp())
| 0wx7c => (print "jl \t"; printJmp())
| 0wx7d => (print "jge \t"; printJmp())
| 0wx7e => (print "jle \t"; printJmp())
| 0wx7f => (print "jg \t"; printJmp())
| 0wx80 => (* Group1_8_a *)
let (* Memory, byte constant *)
(* Opcode is determined by next byte. *)
val nb = Word8.toInt (codeVecGet (seg, !ptr))
in
printArith ((nb div 8) mod 8);
print "\t";
printEA(rex, SZByte);
print ",";
print8 ()
end
| 0wx81 =>
let (* Memory, 32-bit constant *)
(* Opcode is determined by next byte. *)
val nb = Word8.toInt (codeVecGet (seg, !ptr))
in
printArith ((nb div 8) mod 8);
print "\t";
printEA(rex, sizeFromRexW);
print ",";
print32 ()
end
| 0wx83 =>
let (* Word memory, 8-bit constant *)
(* Opcode is determined by next byte. *)
val nb = Word8.toInt (codeVecGet (seg, !ptr))
in
printArith ((nb div 8) mod 8);
print "\t";
printEA(rex, sizeFromRexW);
print ",";
print8 ()
end
| 0wx87 =>
let (* xchng *)
(* Register is in next byte. *)
val nb = codeVecGet (seg, !ptr)
val reg = (nb >>- 0w3) andb8 0w7
in
print "xchng \t";
printEA(rex, sizeFromRexW);
print ",";
print (genRegRepr (mkReg(reg, rexR), sizeFromRexW))
end
| 0wx88 =>
let (* mov eb,gb i.e a store *)
(* Register is in next byte. *)
val nb = Word8.toInt (codeVecGet (seg, !ptr));
val reg = (nb div 8) mod 8;
in
print "mov \t";
printEA(rex, SZByte);
print ",";
if rexR
then print ("r" ^ Int.toString(reg+8) ^ "B")
else case reg of
0 => print "al"
| 1 => print "cl"
| 2 => print "dl"
| 3 => print "bl"
(* If there is a REX byte these select the low byte of the registers. *)
| 4 => print (if rex = 0w0 then "ah" else "sil")
| 5 => print (if rex = 0w0 then "ch" else "dil")
| 6 => print (if rex = 0w0 then "dh" else "bpl")
| 7 => print (if rex = 0w0 then "bh" else "spl")
| _ => print ("r" ^ Int.toString reg)
end
| 0wx89 =>
let (* mov ev,gv i.e. a store *)
(* Register is in next byte. *)
val nb = codeVecGet (seg, !ptr)
val reg = (nb >>- 0w3) andb8 0w7
in
print "mov \t";
(* This may have an opcode prefix. *)
printEA(rex, if legacyPrefix = 0wx66 then SZWord else sizeFromRexW);
print ",";
print (genRegRepr (mkReg(reg, rexR), sizeFromRexW))
end
| 0wx8b =>
let (* mov gv,ev i.e. a load *)
(* Register is in next byte. *)
val nb = codeVecGet (seg, !ptr)
val reg = (nb >>- 0w3) andb8 0w7
in
print "mov \t";
print (genRegRepr (mkReg(reg, rexR), sizeFromRexW));
print ",";
printEA(rex, sizeFromRexW)
end
| 0wx8d =>
let (* lea gv.M *)
(* Register is in next byte. *)
val nb = codeVecGet (seg, !ptr)
val reg = (nb >>- 0w3) andb8 0w7
in
print "lea \t";
print (genRegRepr (mkReg(reg, rexR), sizeFromRexW));
print ",";
printEA(rex, sizeFromRexW)
end
| 0wx8f => (print "pop \t"; printEA(rex, sz32_64))
| 0wx90 => if legacyPrefix = 0wxf3 then print "pause" else print "nop"
| 0wx99 => if rexW then print "cqo" else print "cdq"
| 0wx9e => print "sahf\n"
| 0wxa4 => (if legacyPrefix = 0wxf3 then print "rep " else (); print "movsb")
| 0wxa5 => (if legacyPrefix = 0wxf3 then print "rep " else (); print "movsl")
| 0wxa6 => (if legacyPrefix = 0wxf3 then print "repe " else (); print "cmpsb")
| 0wxa8 => (print "test\tal,"; print8 ())
| 0wxaa => (if legacyPrefix = 0wxf3 then print "rep " else (); print "stosb")
| 0wxab =>
(
if legacyPrefix = 0wxf3 then print "rep " else ();
if rexW then print "stosq" else print "stosl"
)
| 0wxb8 => printMovCToR (opByte, sizeFromRexW, rexB)
| 0wxb9 => printMovCToR (opByte, sizeFromRexW, rexB)
| 0wxba => printMovCToR (opByte, sizeFromRexW, rexB)
| 0wxbb => printMovCToR (opByte, sizeFromRexW, rexB)
| 0wxbc => printMovCToR (opByte, sizeFromRexW, rexB)
| 0wxbd => printMovCToR (opByte, sizeFromRexW, rexB)
| 0wxbe => printMovCToR (opByte, sizeFromRexW, rexB)
| 0wxbf => printMovCToR (opByte, sizeFromRexW, rexB)
| 0wxc1 => (* Group2_8_A *) printShift (opByte, rex, sizeFromRexW)
| 0wxc2 => (print "ret \t"; print16 ())
| 0wxc3 => print "ret"
| 0wxc6 => (* move 8-bit constant to memory *)
(
print "mov \t";
printEA(rex, SZByte);
print ",";
print8 ()
)
| 0wxc7 => (* move 32/64-bit constant to memory *)
(
print "mov \t";
printEA(rex, sizeFromRexW);
print ",";
print32 ()
)
| 0wxca => (* Register mask *)
let
val mask = get16u (!ptr, seg) before (ptr +:= 0w2)
in
print "SAVE\t";
printMask mask
end
| 0wxcd => (* Register mask *)
let
val mask = get8u (!ptr, seg) before (ptr +:= 0w1)
in
print "SAVE\t";
printMask(Word8.toInt mask)
end
| 0wxd1 => (* Group2_1_A *) printShift (opByte, rex, sizeFromRexW)
| 0wxd3 => (* Group2_CL_A *) printShift (opByte, rex, sizeFromRexW)
| 0wxd8 => printFloat (opByte, rex) (* Floating point escapes *)
| 0wxd9 => printFloat (opByte, rex)
| 0wxda => printFloat (opByte, rex)
| 0wxdb => printFloat (opByte, rex)
| 0wxdc => printFloat (opByte, rex)
| 0wxdd => printFloat (opByte, rex)
| 0wxde => printFloat (opByte, rex)
| 0wxdf => printFloat (opByte, rex)
| 0wxe8 =>
let (* 32-bit relative call. *)
val valu = get32s (!ptr, seg) before (ptr +:= 0w4)
in
print "call\t";
print (Word.fmt StringCvt.HEX (!ptr + Word.fromLargeInt valu))
end
| 0wxe9 =>
let (* 32-bit relative jump. *)
val valu = get32s (!ptr, seg) before (ptr +:= 0w4)
in
print "jmp \t";
print (Word.fmt StringCvt.HEX (!ptr + Word.fromLargeInt valu))
end
| 0wxeb => (print "jmp \t"; printJmp())
| 0wxf4 => print "hlt" (* Marker to indicate end-of-code. *)
| 0wxf6 => (* Group3_a *)
let
(* Opcode is determined by next byte. *)
val nb = Word8.toInt (codeVecGet (seg, !ptr))
val opc = (nb div 8) mod 8
in
print
(case opc of
0 => "test"
| 3 => "neg"
| _ => "???"
);
print "\t";
printEA(rex, SZByte);
if opc = 0 then (print ","; print8 ()) else ()
end
| 0wxf7 => (* Group3_A *)
let
(* Opcode is determined by next byte. *)
val nb = Word8.toInt (codeVecGet (seg, !ptr))
val opc = (nb div 8) mod 8
in
print
(case opc of
0 => "test"
| 3 => "neg "
| 4 => "mul "
| 5 => "imul"
| 6 => "div "
| 7 => "idiv"
| _ => "???"
);
print "\t";
printEA(rex, sizeFromRexW);
(* Test has an immediate operand. It's 32-bits even in 64-bit mode. *)
if opc = 0 then (print ","; print32 ()) else ()
end
| 0wxff => (* Group5 *)
let
(* Opcode is determined by next byte. *)
val nb = Word8.toInt (codeVecGet (seg, !ptr))
val opc = (nb div 8) mod 8
in
print
(case opc of
2 => "call"
| 4 => "jmp "
| 6 => "push"
| _ => "???"
);
print "\t";
printEA(rex, sz32_64) (* None of the cases we use need a prefix. *)
end
| _ => print(Word8.fmt StringCvt.HEX opByte);
print "\n"
end; (* end of while loop *)
print "\n"
end (* printCode *);
(* Although this is used locally it must be defined at the top level
otherwise a new RTS function will be compiler every time the
containing function is called *)
val sortFunction: (machineWord * word) array -> bool =
RunCall.rtsCallFast1 "PolySortArrayOfAddresses"
(* This actually does the final code-generation. *)
fun generateCode
{ops=operations,
code=cvec as Code{procName, printAssemblyCode, printStream, profileObject, ...},
labelCount, resultClosure} : unit =
let
val (expanded, newLabelCount) = expandComplexOperations (operations, labelCount)
val () = printLowLevelCode(expanded, cvec)
local
val initialBytesList = codeGenerate expanded
in
(* Fixup labels and shrink long branches to short. *)
val (bytesList, codeSize) = fixupLabels(expanded, initialBytesList, newLabelCount)
end
local
(* Extract the constants and the location of the references from the code. *)
val (inlineConstants, addressConstants, nonAddressConstants) = getConstants(expanded, bytesList)
(* Sort the non-address constants to remove duplicates. There don't seem to be
many in practice.
Since we're not actually interested in the order but only
sorting to remove duplicates we can use a stripped-down Quicksort. *)
fun sort([], out) = out
| sort((addr, median) :: tl, out) = partition(median, tl, [addr], [], [], out)
and partition(median, [], addrs, less, greater, out) =
sort(less, sort(greater, (addrs, median) :: out))
| partition(median, (entry as (addr, value)) :: tl, addrs, less, greater, out) =
if value = median
then partition(median, tl, addr::addrs, less, greater, out)
else if value < median
then partition(median, tl, addrs, entry :: less, greater, out)
else partition(median, tl, addrs, less, entry :: greater, out)
(* Non-address constants. We can't use any ordering on them because a GC could
change the values half way through the sort. Instead we use a simple search
for a small number of constants and use an RTS call for larger numbers. We
want to avoid quadratic cost when there are large numbers. *)
val sortedConstants =
if List.length addressConstants < 10
then
let
fun findDups([], out) = out
| findDups((addr, value) :: tl, out) =
let
fun partition(e as (a, v), (eq, neq)) =
if PolyML.pointerEq(value, v)
then (a :: eq, neq)
else (eq, e :: neq)
val (eqAddr, neq) = List.foldl partition ([addr], []) tl
in
findDups(neq, (eqAddr, value) :: out)
end
in
findDups(addressConstants, [])
end
else
let
fun swap (a, b) = (b, a)
val arrayToSort: (machineWord * word) array =
Array.fromList (List.map swap addressConstants)
val _ = sortFunction arrayToSort
fun makeList((v, a), []) = [([a], v)]
| makeList((v, a), l as (aa, vv) :: tl) =
if PolyML.pointerEq(v, vv)
then (a :: aa, vv) :: tl
else ([a], v) :: l
in
Array.foldl makeList [] arrayToSort
end
in
val inlineConstants = inlineConstants
and addressConstants = sortedConstants
and nonAddressConstants = sort(nonAddressConstants, [])
end
(* Get the number of constants that need to be added to the address area. *)
val constsInConstArea = List.length addressConstants
local
(* Add one byte for the HLT and round up to a number of words. *)
val endOfCode = (codeSize+nativeWordSize) div nativeWordSize * (nativeWordSize div wordSize)
val numOfNonAddrWords = Word.fromInt(List.length nonAddressConstants)
(* Each entry in the non-address constant area is 8 bytes. *)
val intSize = 0w8 div wordSize
in
val endOfByteArea = endOfCode + numOfNonAddrWords * intSize
(* +4 for no of consts. function name, profile object and offset to start of consts. *)
val segSize = endOfByteArea + Word.fromInt constsInConstArea + 0w4
end
(* Create a byte vector and copy the data in. This is a byte area and not scanned
by the GC so cannot contain any addresses. *)
val byteVec = byteVecMake segSize
val ic = ref 0w0
local
fun genByte (ival: Word8.word) = set8u (ival, !ic, byteVec) before ic := !ic + 0w1
in
fun genBytes l = Word8Vector.app (fn i => genByte i) l
val () = List.app (fn b => genBytes b) bytesList
val () = genBytes(Word8Vector.fromList(opCodeBytes(HLT, NONE))) (* Marker - this is used by ScanConstants in the RTS. *)
end
(* Align ic onto a fullword boundary. *)
val () = ic := ((!ic + nativeWordSize - 0w1) andb ~nativeWordSize)
(* Copy the non-address constants. These are only used in 64-bit mode and are
either real constants or integers that are too large to fit in a 32-bit
inline constants. We don't use this for real constants in 32-bit mode because
we don't have relative addressing. Instead a real constant is the inline
address of a boxed real number. *)
local
fun putNonAddrConst(addrs, constant) =
let
val addrOfConst = ! ic
val () = genBytes(Word8Vector.fromList(largeWordToBytes(constant, 8)))
fun setAddr addr = set32s(Word.toLargeInt(addrOfConst - addr - 0w4), addr, byteVec)
in
List.app setAddr addrs
end
in
val () = List.app putNonAddrConst nonAddressConstants
end
val _ = bytesToWords(! ic) = endOfByteArea orelse raise InternalError "mismatch"
(* Put in the number of constants. This must go in before we actually put
in any constants. In 32-bit mode there are only two constants: the
function name and the profile object.
All other constants are in the code. *)
local
val lastWord = wordsToBytes(endOfByteArea + 0w3 + Word.fromInt constsInConstArea)
fun setBytes(_, _, 0) = ()
| setBytes(ival, offset, count) =
(
byteVecSet(byteVec, offset, Word8.fromLargeInt(ival mod 256));
setBytes(ival div 256, offset+0w1, count-1)
)
in
val () = setBytes(LargeInt.fromInt(2 + constsInConstArea), wordsToBytes endOfByteArea, Word.toInt wordSize)
(* Set the last word of the code to the (negative) byte offset of the start of the code area
from the end of this word. *)
val () = setBytes(Word.toLargeIntX(wordsToBytes endOfByteArea - lastWord), lastWord, Word.toInt wordSize)
end;
(* We've put in all the byte data so it is safe to convert this to a mutable code
cell that can contain addresses and will be scanned by the GC. *)
val codeSeg = byteVecToCodeVec(byteVec, resultClosure)
(* Various RTS functions assume that the first constant is the function name.
The profiler assumes that the second word is the address of the mutable that
contains the profile count. *)
val () = codeVecPutWord (codeSeg, endOfByteArea + 0w1, toMachineWord procName)
(* Next the profile object. *)
val () = codeVecPutWord (codeSeg, endOfByteArea + 0w2, profileObject)
in
let
fun setBytes(_, _, 0w0) = ()
| setBytes(b, addr, count) =
(
codeVecSet (codeSeg, addr, wordToWord8 b);
setBytes(b >> 0w8, addr+0w1, count-0w1)
)
(* Inline constants - native 32-bit only plus one special case in 32-in-64 *)
fun putInlConst (addrs, SelfAddress) =
(* Self address goes inline. *)
codeVecPutConstant (codeSeg, addrs, toMachineWord(codeVecAddr codeSeg), ConstAbsolute)
| putInlConst (addrs, InlineAbsoluteAddress m) =
codeVecPutConstant (codeSeg, addrs, m, ConstAbsolute)
| putInlConst (addrs, InlineRelativeAddress m) =
codeVecPutConstant (codeSeg, addrs, m, ConstX86Relative)
val _ = List.app putInlConst inlineConstants
(* Address constants - native 64-bit and 32-in-64. *)
fun putAddrConst ((addrs, m), constAddr) =
(* Put the constant in the constant area and set the original address
to be the relative offset to the constant itself. *)
(
codeVecPutWord (codeSeg, constAddr, m);
(* Put in the 32-bit offset - always unsigned since the destination
is after the reference. *)
List.app(fn addr => setBytes(constAddr * wordSize - addr - 0w4, addr, 0w4)) addrs;
constAddr+0w1
)
(* Put the constants. Any values in the constant area start at +3 i.e. after the profile. *)
val _ = List.foldl putAddrConst (endOfByteArea+0w3) addressConstants
val () =
if printAssemblyCode
then (* print out the code *)
(
printCode(cvec, codeSeg);
printStream "\n\n"
)
else ()
in
(* Finally lock the code. *)
codeVecLock(codeSeg, resultClosure)
end (* the result *)
end (* generateCode *)
structure Sharing =
struct
type code = code
and reg = reg
and genReg = genReg
and fpReg = fpReg
and addrs = addrs
and operation = operation
and regSet = RegSet.regSet
and label = label
and branchOps = branchOps
and arithOp = arithOp
and shiftType = shiftType
and repOps = repOps
and fpOps = fpOps
and fpUnaryOps = fpUnaryOps
and sse2Operations = sse2Operations
and opSize = opSize
and closureRef = closureRef
end
end (* struct *) (* CODECONS *);